diff options
Diffstat (limited to 'Code')
| -rw-r--r-- | Code/update_network_by_force.py | 27 | 
1 files changed, 24 insertions, 3 deletions
| diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index cf892ea..1478cc0 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -44,16 +44,37 @@ def age_of_file_in_seconds(fname):      return seconds -def new_edge_line(line, file_lst): +def make_edge_dict_from_files(file_lst): +    d = {}      for fname in file_lst:          with open(fname) as f:              existing_lines = lines_with_10_fields(f.read()) -            if line in existing_lines: +            for line in existing_lines: +                lst = line.split('\t') +                k = lst[0] + lst[1] +                v = hash(''.join(lst[2:])) +                if not k in d: +                    d[k] = [v] +                else: +                    d[k].append(v) +    return d + + +def new_edge_line(line, edge_dict): +    lst = line.split('\t') +    k = lst[0] + lst[1] +    if not k in edge_dict: +        return True +    else: +        existing = edge_dict[k] +        for x in existing: +            if x == hash(''.join(lst[2:])):                  return False      return True  def concatenate_edge_files(fname_lst, dir_out, fname_out): +    edge_dict = make_edge_dict_from_files(glob.glob(os.path.join(dir_out, 'edges.txt.*')))      fout = open(os.path.join(dir_out, fname_out), 'w')      for fname in fname_lst:          with open(fname) as f: @@ -64,7 +85,7 @@ def concatenate_edge_files(fname_lst, dir_out, fname_out):              # do not write duplicate lines, to save space              kept_lines = []              for line in lines: -                if new_edge_line(line, glob.glob(os.path.join(dir_out, 'edges.txt.*'))): +                if new_edge_line(line, edge_dict):                      kept_lines.append(line)              if kept_lines != []:                  fout.write('\n'.join(kept_lines) + '\n') | 
