diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2021-04-24 21:28:18 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2021-04-24 21:28:18 +0800 |
commit | b4d906ad1b5b89bd1cf58632485a87c3a2bf7e32 (patch) | |
tree | e05a9551f7d1e1913148990d4bd3cb5950d62090 /Code | |
parent | 2ea63a2ca9778c1154f7d800d3d7a08e78afd8fb (diff) |
merge_edges.py: use the most recent 365 edge files (to handle out-of-memory problem).
Diffstat (limited to 'Code')
-rw-r--r-- | Code/merge_edges.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py index ff87761..7d4829a 100644 --- a/Code/merge_edges.py +++ b/Code/merge_edges.py @@ -149,7 +149,8 @@ def make_new_edge(d): write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE) d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples. Each tuple is a historical edge. file_count = 0 -for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): +edges_file_lst = glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*')) # list all edge files +for fname in sorted(edges_file_lst, key=lambda t: -os.stat(t).st_mtime): # sort edge files in descending order of modification time file_count += 1 print('[merge_edges.py] Including %s. Dictionary size %d.' % (fname, len(d))) #write_log_file('[merge_edges.py] including file %s.' % (fname) , UPDATE_NETWORK_LOG_FILE) @@ -198,6 +199,10 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): f.close() + # use the most recent 365 edge files because they might be the best + if file_count > 365: + break + write_log_file('[merge_edges.py] BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE) # make html pages |