summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2024-08-05 18:01:26 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2024-08-05 18:01:26 +0800
commit1ba96fd1fc6fc27d24ed1ae2776a8b85ee967fc5 (patch)
treea40e87cfb9d775162032834dca894cb0aa27321e
parentc839087231fbae65f0dae5d303f8d2457fa40629 (diff)
update_network_by_force.py: improve code speed
-rw-r--r--Code/update_network_by_force.py27
1 files changed, 24 insertions, 3 deletions
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py
index cf892ea..1478cc0 100644
--- a/Code/update_network_by_force.py
+++ b/Code/update_network_by_force.py
@@ -44,16 +44,37 @@ def age_of_file_in_seconds(fname):
return seconds
-def new_edge_line(line, file_lst):
+def make_edge_dict_from_files(file_lst):
+ d = {}
for fname in file_lst:
with open(fname) as f:
existing_lines = lines_with_10_fields(f.read())
- if line in existing_lines:
+ for line in existing_lines:
+ lst = line.split('\t')
+ k = lst[0] + lst[1]
+ v = hash(''.join(lst[2:]))
+ if not k in d:
+ d[k] = [v]
+ else:
+ d[k].append(v)
+ return d
+
+
+def new_edge_line(line, edge_dict):
+ lst = line.split('\t')
+ k = lst[0] + lst[1]
+ if not k in edge_dict:
+ return True
+ else:
+ existing = edge_dict[k]
+ for x in existing:
+ if x == hash(''.join(lst[2:])):
return False
return True
def concatenate_edge_files(fname_lst, dir_out, fname_out):
+ edge_dict = make_edge_dict_from_files(glob.glob(os.path.join(dir_out, 'edges.txt.*')))
fout = open(os.path.join(dir_out, fname_out), 'w')
for fname in fname_lst:
with open(fname) as f:
@@ -64,7 +85,7 @@ def concatenate_edge_files(fname_lst, dir_out, fname_out):
# do not write duplicate lines, to save space
kept_lines = []
for line in lines:
- if new_edge_line(line, glob.glob(os.path.join(dir_out, 'edges.txt.*'))):
+ if new_edge_line(line, edge_dict):
kept_lines.append(line)
if kept_lines != []:
fout.write('\n'.join(kept_lines) + '\n')