summaryrefslogtreecommitdiff
path: root/Code
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2021-04-24 21:28:18 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2021-04-24 21:28:18 +0800
commitb4d906ad1b5b89bd1cf58632485a87c3a2bf7e32 (patch)
treee05a9551f7d1e1913148990d4bd3cb5950d62090 /Code
parent2ea63a2ca9778c1154f7d800d3d7a08e78afd8fb (diff)
merge_edges.py: use the most recent 365 edge files (to handle out-of-memory problem).
Diffstat (limited to 'Code')
-rw-r--r--Code/merge_edges.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index ff87761..7d4829a 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -149,7 +149,8 @@ def make_new_edge(d):
write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE)
d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples. Each tuple is a historical edge.
file_count = 0
-for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):
+edges_file_lst = glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*')) # list all edge files
+for fname in sorted(edges_file_lst, key=lambda t: -os.stat(t).st_mtime): # sort edge files in descending order of modification time
file_count += 1
print('[merge_edges.py] Including %s. Dictionary size %d.' % (fname, len(d)))
#write_log_file('[merge_edges.py] including file %s.' % (fname) , UPDATE_NETWORK_LOG_FILE)
@@ -198,6 +199,10 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):
f.close()
+ # use the most recent 365 edge files because they might be the best
+ if file_count > 365:
+ break
+
write_log_file('[merge_edges.py] BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE)
# make html pages