merge_edges.py: use the most recent 365 edge files (to handle out-of-memory problem).

author: Hui Lan <lanhui@zjnu.edu.cn> 2021-04-24 21:28:18 +0800
committer: Hui Lan <lanhui@zjnu.edu.cn> 2021-04-24 21:28:18 +0800
commit: b4d906ad1b5b89bd1cf58632485a87c3a2bf7e32 (patch)
tree: e05a9551f7d1e1913148990d4bd3cb5950d62090 /Code
parent: 2ea63a2ca9778c1154f7d800d3d7a08e78afd8fb (diff)
1 files changed, 6 insertions, 1 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index ff87761..7d4829a 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -149,7 +149,8 @@ def make_new_edge(d):
 write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE)
 d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples.  Each tuple is a historical edge.
 file_count = 0
-for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):
+edges_file_lst = glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*')) # list all edge files
+for fname in sorted(edges_file_lst, key=lambda t: -os.stat(t).st_mtime): # sort edge files in descending order of modification time
     file_count += 1
     print('[merge_edges.py] Including %s.  Dictionary size %d.' % (fname, len(d)))
     #write_log_file('[merge_edges.py] including file %s.' % (fname) , UPDATE_NETWORK_LOG_FILE)
@@ -198,6 +199,10 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):
 
     f.close()
 
+    # use the most recent 365 edge files because they might be the best
+    if file_count > 365:
+        break
+
 write_log_file('[merge_edges.py] BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE)            
 
 # make html pages
author	Hui Lan <lanhui@zjnu.edu.cn>	2021-04-24 21:28:18 +0800
committer	Hui Lan <lanhui@zjnu.edu.cn>	2021-04-24 21:28:18 +0800
commit	b4d906ad1b5b89bd1cf58632485a87c3a2bf7e32 (patch)
tree	e05a9551f7d1e1913148990d4bd3cb5950d62090 /Code
parent	2ea63a2ca9778c1154f7d800d3d7a08e78afd8fb (diff)