diff options
Diffstat (limited to 'Code/update_network_by_force.py')
-rw-r--r-- | Code/update_network_by_force.py | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index 252684c..079f677 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -52,8 +52,8 @@ def age_of_file_in_seconds(fname): seconds = time.time() - st.st_mtime return seconds -def concatenate_edge_files(fname_lst, fname_out): - fout = open(fname_out, 'w') +def concatenate_edge_files(fname_lst, dir_out, fname_out): + fout = open(os.path.join(dir_out, fname_out), 'w') for fname in fname_lst: f = open(fname) s = f.read() @@ -61,10 +61,22 @@ def concatenate_edge_files(fname_lst, fname_out): # Make sure each edge has 10 fields before writing. lines = lines_with_10_fields(s) if lines != []: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) - fout.write('\n'.join(lines) + '\n') + write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) + # do not write duplicate lines, to save space + kept_lines = [] + for line in lines: + is_new = True + for fn in glob.glob(os.path.join(dir_out, 'edges.txt.*')): + with open(fn) as f2: + existing_lines = lines_with_10_fields(f2.read()) + if line in existing_lines: + is_new = False + break + if is_new: + kept_lines.append(line) + fout.write('\n'.join(kept_lines) + '\n') else: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) fout.close() @@ -129,7 +141,7 @@ elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # # concatenate edge files into one write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) curr_time = datetime.now().strftime('%Y%m%d_%H%M') - concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)) + concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time) delete_edge_files(edge_file_lst) # delete these files only when they are no longer being written. if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create a new edges.txt |