summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2024-08-05 15:26:04 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2024-08-05 15:26:04 +0800
commitf0104defa63e757b3beb93959ae60f06dbc0ca24 (patch)
tree32d64ee7c279306c716f6f25515c60745928eeeb
parent62730363eaf24647b9505f87f553a0ffdae8a2ab (diff)
update_network_by_force.py: review and make updates
-rw-r--r--Code/update_network_by_force.py116
1 files changed, 54 insertions, 62 deletions
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py
index fae2965..125ca60 100644
--- a/Code/update_network_by_force.py
+++ b/Code/update_network_by_force.py
@@ -44,38 +44,37 @@ def age_of_file_in_seconds(fname):
return seconds
+def new_edge_line(line, file_lst):
+ for fname in file_lst:
+ with open(fname) as f:
+ existing_lines = lines_with_10_fields(f.read())
+ if line in existing_lines:
+ return False
+ return True
+
+
def concatenate_edge_files(fname_lst, dir_out, fname_out):
fout = open(os.path.join(dir_out, fname_out), 'w')
for fname in fname_lst:
- f = open(fname)
- s = f.read()
- f.close()
+ with open(fname) as f:
+ s = f.read()
# Make sure each edge has 10 fields before writing.
lines = lines_with_10_fields(s)
if lines != []:
- write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)
# do not write duplicate lines, to save space
kept_lines = []
for line in lines:
- is_new = True
- for fn in glob.glob(os.path.join(dir_out, 'edges.txt.*')):
- with open(fn) as f2:
- existing_lines = lines_with_10_fields(f2.read())
- if line in existing_lines:
- is_new = False
- break
- if is_new:
+ if new_edge_line(line, glob.glob(os.path.join(dir_out, 'edges.txt.*'))):
kept_lines.append(line)
- fout.write('\n'.join(kept_lines) + '\n')
- else:
- write_log_file('[update_network_by_force.py] In function concatenate_edge_files(). Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE)
+ if kept_lines != []:
+ fout.write('\n'.join(kept_lines) + '\n')
fout.close()
def delete_edge_files(fname_lst):
age_in_hours = 6
for fname in fname_lst:
- # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete.
+ # Before we delete a file, we should make sure it is not being updated. Make sure it is old enough. Otherwise, don't delete.
if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours
os.remove(fname)
else:
@@ -109,31 +108,26 @@ def summarize_edge_file(fname):
########## Merge edges #######################
-# Update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ...
+# Update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated there ...
# Definition of HISTORY_DIR and HISTORY_DIR2 could be found in configure.py
-time.sleep(3)
-edge_file_lst = [] # collect edge files.
+time.sleep(10)
+edge_file_lst = [] # collect edge files (file names).
most_recent_edge_modification_time = 0
-write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
-for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged
- edge_file_lst.append(fname)
- if os.path.getmtime(fname) > most_recent_edge_modification_time:
- most_recent_edge_modification_time = os.path.getmtime(fname)
-
-write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
-for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # many edges.txt.* are to be merged
- edge_file_lst.append(fname)
- if os.path.getmtime(fname) > most_recent_edge_modification_time:
- most_recent_edge_modification_time = os.path.getmtime(fname)
+for history_directory in [HISTORY_DIR, HISTORY_DIR2]:
+ write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
+ for fname in glob.glob(os.path.join(history_directory, 'edges.txt.*')): # many small edges.txt.* are to be merged
+ edge_file_lst.append(fname)
+ if os.path.getmtime(fname) > most_recent_edge_modification_time:
+ most_recent_edge_modification_time = os.path.getmtime(fname)
if edge_file_lst == []:
write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add.
- # concatenate edge files into one
+ # concatenate edge files into one and store in EDGE_POOL_DIR
write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
curr_time = datetime.now().strftime('%Y%m%d_%H%M')
- concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)
+ concatenate_edge_files(edge_file_lst, EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time) # this will update EDGE_POOL_DIR
delete_edge_files(edge_file_lst) # delete these files only when they are no longer being written.
if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create a new edges.txt
@@ -141,49 +135,47 @@ if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge
write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
Sold = utils.get_edge_set(MERGED_EDGE_FILE) # all old edges stored in a set
- cmd = 'python3 merge_edges.py' # invoke another script the merge all edge files in EDGE_POOL_DIR
+ cmd = 'python3 merge_edges.py' # invoke another script to merge all edge files in EDGE_POOL_DIR
return_value = os.system(cmd)
if return_value != 0:
- write_log_file('[update_network_by_force.py] Something wrong occurred to merge_edges.py. Perhaps your computer is running out of memory.', UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network_by_force.py] WARNING: something wrong occurred to merge_edges.py. Perhaps your computer is running out of memory.', UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
Snew = utils.get_edge_set(MERGED_EDGE_FILE) # all new edges stored in a set. Note that MERGED_EDGE_FILE has been updated by 'python3 merge_edges.py'
utils.make_new_edges_file(Sold, Snew, MERGED_EDGE_FILE, DIFF_EDGE_FILE)
- manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before'
+ manual_copy_commands = 'MANUAL: Please copy edges.txt to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before'
write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE)
- write_log_file('[update_network_by_force.py] Make html files for the web application.', UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network_by_force.py] Make HTML files for the web application.', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 html_network.py -f %s -r %s -c %s -n %s' % (MERGED_EDGE_FILE, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_NET)
os.system(cmd)
if datetime.now().day % 28 == 0:
copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis
-
# Compute overlap
-f = open('../Data/temp/AtRegNet.20210208.csv')
-AtRegNet_dict = {}
-for line in f:
- line = line.strip()
- lst = line.split(',')
- if lst[0] != 'TFName' and len(lst) > 4:
- tf = lst[1].upper().strip()
- target = lst[4].upper().strip()
- AtRegNet_dict[tf+target] = 100
-f.close()
-
-f = open(MERGED_EDGE_FILE)
-BrainEdges_dict = {}
-for line in f:
- line = line.strip()
- lst = line.split('\t')
- tf = lst[1].split()[0]
- target = lst[0].split()[0]
- score = float(lst[8])
- BrainEdges_dict[tf+target] = score
-f.close()
-
-overlap = Overlap(BrainEdges_dict, 3, AtRegNet_dict, 0)
-write_log_file('[update_network_by_force.py] Performance stats - TP:%d, PP:%d, Hit rate: %4.7f while comparing with AtRegNet.20210208.csv.' % (overlap.getTP(), overlap.getNumberOfPositivesInPred(), overlap.getTP()/overlap.getNumberOfPositivesInPred()), UPDATE_NETWORK_LOG_FILE)
+gold_standard_file = '../Data/temp/AtRegNet.20210208.csv'
+if os.path.exists(gold_standard_file) and os.path.exists(MERGED_EDGE_FILE):
+ AtRegNet_dict = {}
+ with open(gold_standard_file) as f:
+ for line in f:
+ line = line.strip()
+ lst = line.split(',')
+ if lst[0] != 'TFName' and len(lst) > 4:
+ tf = lst[1].upper().strip()
+ target = lst[4].upper().strip()
+ AtRegNet_dict[tf+target] = 100
+
+ BrainEdges_dict = {}
+ with open(MERGED_EDGE_FILE) as f:
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ tf = lst[1].split()[0]
+ target = lst[0].split()[0]
+ score = float(lst[8])
+ BrainEdges_dict[tf+target] = score
+
+ overlap = Overlap(BrainEdges_dict, 3, AtRegNet_dict, 0)
+ write_log_file('[update_network_by_force.py] Performance stats - TP:%d, PP:%d, Hit rate: %4.7f while comparing with AtRegNet.20210208.csv.' % (overlap.getTP(), overlap.getNumberOfPositivesInPred(), overlap.getTP()/overlap.getNumberOfPositivesInPred()), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)
-