diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-05 18:01:26 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-05 18:01:26 +0800 |
commit | 1ba96fd1fc6fc27d24ed1ae2776a8b85ee967fc5 (patch) | |
tree | a40e87cfb9d775162032834dca894cb0aa27321e | |
parent | c839087231fbae65f0dae5d303f8d2457fa40629 (diff) |
update_network_by_force.py: improve code speed
-rw-r--r-- | Code/update_network_by_force.py | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index cf892ea..1478cc0 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -44,16 +44,37 @@ def age_of_file_in_seconds(fname): return seconds -def new_edge_line(line, file_lst): +def make_edge_dict_from_files(file_lst): + d = {} for fname in file_lst: with open(fname) as f: existing_lines = lines_with_10_fields(f.read()) - if line in existing_lines: + for line in existing_lines: + lst = line.split('\t') + k = lst[0] + lst[1] + v = hash(''.join(lst[2:])) + if not k in d: + d[k] = [v] + else: + d[k].append(v) + return d + + +def new_edge_line(line, edge_dict): + lst = line.split('\t') + k = lst[0] + lst[1] + if not k in edge_dict: + return True + else: + existing = edge_dict[k] + for x in existing: + if x == hash(''.join(lst[2:])): return False return True def concatenate_edge_files(fname_lst, dir_out, fname_out): + edge_dict = make_edge_dict_from_files(glob.glob(os.path.join(dir_out, 'edges.txt.*'))) fout = open(os.path.join(dir_out, fname_out), 'w') for fname in fname_lst: with open(fname) as f: @@ -64,7 +85,7 @@ def concatenate_edge_files(fname_lst, dir_out, fname_out): # do not write duplicate lines, to save space kept_lines = [] for line in lines: - if new_edge_line(line, glob.glob(os.path.join(dir_out, 'edges.txt.*'))): + if new_edge_line(line, edge_dict): kept_lines.append(line) if kept_lines != []: fout.write('\n'.join(kept_lines) + '\n') |