1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
# Usage: python3 update_network_by_force.py
# Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday.
#
# Revision history:
# Last modified: 24 Nov 2019, hui <lanhui@zjnu.edu.cn>
import os, sys
import glob
import time
from datetime import datetime
from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR
from configure import PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_NET
from backup_files import copy_and_backup_file
########## Helper functions #######################
def write_log_file(s, fname):
f = open(fname, 'a')
curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
s = '[' + curr_time + ']: ' + s
if not '\n' in s:
s += '\n'
f.write(s)
f.close()
print('Log: %s' % (s.strip()))
def num_line(fname):
''' Return number of lines in file fname. '''
if not os.path.exists(fname):
return 0
f = open(fname)
lines = f.readlines()
f.close()
return len(lines)
def lines_with_10_fields(s):
result = []
for line in s.split('\n'):
line = line.strip()
if len(line.split('\t')) == 10:
result.append(line)
return result
def age_of_file_in_seconds(fname):
''' Return age of fname in days. '''
st = os.stat(fname)
seconds = time.time() - st.st_mtime
return seconds
def concatenate_edge_files(fname_lst, fname_out):
fout = open(fname_out, 'w')
for fname in fname_lst:
f = open(fname)
s = f.read()
f.close()
# Make sure each edge has 10 fields before writing.
lines = lines_with_10_fields(s)
if lines != []:
write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)
fout.write('\n'.join(lines) + '\n')
else:
write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE)
fout.close()
def delete_edge_files(fname_lst):
age_in_hours = 6
for fname in fname_lst:
# Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete.
if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours
os.remove(fname)
else:
write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE)
def summarize_edge_file(fname):
''' Return number of lines in file fname. '''
if not os.path.exists(fname):
return 'File %s does not exist.' % (fname)
f = open(fname)
tau = 2.0
count_below = 0
count_above = 0
count_total = 0
for line in f:
line = line.strip()
lst = line.split('\t')
if len(lst) == 10:
association_strength = float(lst[8])
count_total += 1
if association_strength > tau:
count_above += 1
else:
count_below += 1
f.close()
if count_total > 0:
return '#edges above %4.1f: %d (%4.3f percent), #edges below %4.1f: %d (%4.3f percent).' % (tau, count_above, 100.0*count_above/count_total, tau, count_below, 100.0*count_below/count_total)
else:
return 'Total edges is 0.'
########## Renew saved G.pickle and SOURCE_NODES.pickle in Webapp #######################
cmd = 'curl http://118.25.96.118/brain/before'
os.system(cmd)
########## Merge edges #######################
# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ...
time.sleep(3)
edge_file_lst = [] # collect edge files.
most_recent_edge_modification_time = 0
write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged
edge_file_lst.append(fname)
if os.path.getmtime(fname) > most_recent_edge_modification_time:
most_recent_edge_modification_time = os.path.getmtime(fname)
write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged
edge_file_lst.append(fname)
if os.path.getmtime(fname) > most_recent_edge_modification_time:
most_recent_edge_modification_time = os.path.getmtime(fname)
if edge_file_lst == []:
write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add.
# concatenate edge files into one
write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
curr_time = datetime.now().strftime('%Y%m%d_%H%M')
concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time))
delete_edge_files(edge_file_lst)
if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt
write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 merge_edges.py'
os.system(cmd)
write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before'
write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE)
cmd = 'html_network.py -f %s -r ../Data/parameter/parameter_for_buildRmatrix.txt -c ../Data/parameter/parameter_for_buildCmatrix.txt -n ../Data/parameter/parameter_for_net.txt' % (MERGED_EDGE_FILE, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_NET)
os.system(cmd)
copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis
write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)
|