# Usage: python3 update_network_by_force.py # Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday. # # Revision history: # Last modified: 24 Nov 2019, hui import os, sys import glob import time from datetime import datetime from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR from backup_files import copy_and_backup_file ########## Helper functions ####################### def write_log_file(s, fname): f = open(fname, 'a') curr_time = datetime.now().strftime('%Y-%m-%d %H:%M') s = '[' + curr_time + ']: ' + s if not '\n' in s: s += '\n' f.write(s) f.close() print('Log: %s' % (s.strip())) def num_line(fname): ''' Return number of lines in file fname. ''' if not os.path.exists(fname): return 0 f = open(fname) lines = f.readlines() f.close() return len(lines) def lines_with_10_fields(s): result = [] for line in s.split('\n'): line = line.strip() if len(line.split('\t')) == 10: result.append(line) return result def age_of_file_in_seconds(fname): ''' Return age of fname in days. ''' st = os.stat(fname) seconds = time.time() - st.st_mtime return seconds def concatenate_edge_files(fname_lst, fname_out): fout = open(fname_out, 'w') for fname in fname_lst: f = open(fname) s = f.read() f.close() # Make sure each edge has 10 fields before writing. lines = lines_with_10_fields(s) if lines != []: write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) fout.write('\n'.join(lines) + '\n') else: write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) fout.close() def delete_edge_files(fname_lst): age_in_hours = 6 for fname in fname_lst: # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete. if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours os.remove(fname) else: write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE) ########## Merge edges ####################### # update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ... time.sleep(3) edge_file_lst = [] # collect edge files. most_recent_edge_modification_time = 0 write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE) for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged edge_file_lst.append(fname) if os.path.getmtime(fname) > most_recent_edge_modification_time: most_recent_edge_modification_time = os.path.getmtime(fname) write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged edge_file_lst.append(fname) if os.path.getmtime(fname) > most_recent_edge_modification_time: most_recent_edge_modification_time = os.path.getmtime(fname) if edge_file_lst == []: write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add. # concatenate edge files into one write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) curr_time = datetime.now().strftime('%Y%m%d_%H%M') concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)) delete_edge_files(edge_file_lst) if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE) write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) cmd = 'python3 merge_edges.py' os.system(cmd) write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo find /home/lanhui/brain/Data/temp/html_edges -name "*.html" -exec mv -t /var/www/brain/brain/static/edges {} +' write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE) copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)