From b4d906ad1b5b89bd1cf58632485a87c3a2bf7e32 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sat, 24 Apr 2021 21:28:18 +0800 Subject: merge_edges.py: use the most recent 365 edge files (to handle out-of-memory problem). --- Code/merge_edges.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Code/merge_edges.py b/Code/merge_edges.py index ff87761..7d4829a 100644 --- a/Code/merge_edges.py +++ b/Code/merge_edges.py @@ -149,7 +149,8 @@ def make_new_edge(d): write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE) d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples. Each tuple is a historical edge. file_count = 0 -for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): +edges_file_lst = glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*')) # list all edge files +for fname in sorted(edges_file_lst, key=lambda t: -os.stat(t).st_mtime): # sort edge files in descending order of modification time file_count += 1 print('[merge_edges.py] Including %s. Dictionary size %d.' % (fname, len(d))) #write_log_file('[merge_edges.py] including file %s.' % (fname) , UPDATE_NETWORK_LOG_FILE) @@ -198,6 +199,10 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): f.close() + # use the most recent 365 edge files because they might be the best + if file_count > 365: + break + write_log_file('[merge_edges.py] BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE) # make html pages -- cgit v1.2.1