diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-04 15:59:39 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-04 15:59:39 +0800 |
commit | 0d5e23b870ef9fa21cc90e31150dc1c27d56b3f8 (patch) | |
tree | 2a6c3743a4085f8113c661e81c8c545b4945705a /Code | |
parent | 890ba0ffa0ad91ba7e1fc3f5351aa2e644ac05b5 (diff) |
Do not record duplicate edges (to save space). May take longer to run.
Diffstat (limited to 'Code')
-rw-r--r-- | Code/update_network_by_force.py | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index 252684c..079f677 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -52,8 +52,8 @@ def age_of_file_in_seconds(fname): seconds = time.time() - st.st_mtime return seconds -def concatenate_edge_files(fname_lst, fname_out): - fout = open(fname_out, 'w') +def concatenate_edge_files(fname_lst, dir_out, fname_out): + fout = open(os.path.join(dir_out, fname_out), 'w') for fname in fname_lst: f = open(fname) s = f.read() @@ -61,10 +61,22 @@ def concatenate_edge_files(fname_lst, fname_out): # Make sure each edge has 10 fields before writing. lines = lines_with_10_fields(s) if lines != []: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) - fout.write('\n'.join(lines) + '\n') + write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) + # do not write duplicate lines, to save space + kept_lines = [] + for line in lines: + is_new = True + for fn in glob.glob(os.path.join(dir_out, 'edges.txt.*')): + with open(fn) as f2: + existing_lines = lines_with_10_fields(f2.read()) + if line in existing_lines: + is_new = False + break + if is_new: + kept_lines.append(line) + fout.write('\n'.join(kept_lines) + '\n') else: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) fout.close() @@ -129,7 +141,7 @@ elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # # concatenate edge files into one write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) curr_time = datetime.now().strftime('%Y%m%d_%H%M') - concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)) + concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time) delete_edge_files(edge_file_lst) # delete these files only when they are no longer being written. if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create a new edges.txt |