summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-12-07 09:58:54 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-12-07 09:58:54 +0800
commite7f996c480b3be437794041077b297f3e9ddc1bb (patch)
tree1d54ee18eb55627a648ecf3964ac385fa3e0550d
parent33da62c9fe9aebe1f28d89bcb58ea2c0390db00d (diff)
create backup_files.py for backing up files
Define a function copy_and_backup_file(src_file, dest_dir) to do backup and compression work. The function copy_and_backup_file is used in update_network_by_force.py. -Hui
-rw-r--r--Code/backup_files.py54
-rwxr-xr-xCode/update_network.py1
-rw-r--r--Code/update_network_by_force.py227
3 files changed, 169 insertions, 113 deletions
diff --git a/Code/backup_files.py b/Code/backup_files.py
new file mode 100644
index 0000000..39a0d30
--- /dev/null
+++ b/Code/backup_files.py
@@ -0,0 +1,54 @@
+# Usage: use the copy_and_backup_file() as a utility function for backing up a file.
+#
+# Purpose: the function copy_and_backup_file(src, dest_dir) copies file src to destination directory (if exists) and compress
+# the copied file in the destination directory (to save space).
+#
+#
+# Created on 7 December 2019 by Hui Lan (lanhui@zjnu.edu.cn)
+
+import os, sys
+from configure import UPDATE_NETWORK_LOG_FILE
+from datetime import datetime
+
+MINIMUM_SPACE_REQUIREMENT = 1 # Gigabytes
+
+def write_log_file(s, fname):
+ if not os.path.exists(fname):
+ return None
+ f = open(fname, 'a')
+ curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
+ s = '[' + curr_time + ']: ' + s
+ if not '\n' in s:
+ s += '\n'
+ f.write(s)
+ f.close()
+
+
+def make_paths(s):
+ if not os.path.isdir(s):
+ os.makedirs(s)
+
+
+def disk_has_enough_space():
+ available_G = 4 * os.statvfs('/home').f_bavail / (1024*1024) # compute available space (in Gigabytes). Each block has 4k bytes, work for Linux/UNIX systems only
+ if available_G < MINIMUM_SPACE_REQUIREMENT:
+ print('[backup_files.py] home directory does not have enough space (only %4.1f G is available) ' % (available_G))
+ write_log_file('[backup_files.py] WARNING: home directory does not have enough space (only %4.1f G is available)! No backup is carried out.' % (available_G), UPDATE_NETWORK_LOG_FILE)
+ sys.exit()
+
+
+def copy_and_backup_file(src_file, dest_dir):
+ disk_has_enough_space() # make sure we have enough space firs.t
+ if not os.path.exists(src_file):
+ sys.exit()
+ make_paths(dest_dir) # if dest_dir does not exist, create it.
+ curr_date = datetime.now().strftime('%Y%m%d')
+ dest_file = os.path.join(dest_dir, os.path.basename(src_file) + '.' + curr_date)
+ cmd = 'cp %s %s && cd %s && gzip -f %s' % (src_file, dest_file, dest_dir, dest_file)
+ os.system(cmd)
+ write_log_file('[backup_files.py] File %s has been backed up to %s and zipped (.gz)' % (src_file, dest_file), UPDATE_NETWORK_LOG_FILE)
+
+
+## main
+if __name__ == '__main__':
+ copy_and_backup_file('../Data/temp/edges.txt', '../Analysis')
diff --git a/Code/update_network.py b/Code/update_network.py
index e29eac1..f65b3ba 100755
--- a/Code/update_network.py
+++ b/Code/update_network.py
@@ -593,6 +593,7 @@ def check_rnaseq_info():
# sys.exit()
+
FILE_LIST_TO_CHECK = [PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_NET, \
MERGED_EDGE_FILE, BINDING_FILE, TPM_FILE] # a list of very important files
diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py
index 7ba8c87..11e8d72 100644
--- a/Code/update_network_by_force.py
+++ b/Code/update_network_by_force.py
@@ -1,113 +1,114 @@
-# Usage: python3 update_network_by_force.py
-# Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday.
-#
-# Revision history:
-# Last modified: 24 Nov 2019, hui <lanhui@zjnu.edu.cn>
-
-import os, sys
-import glob
-import time
-from datetime import datetime
-from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR
-
-########## Helper functions #######################
-def write_log_file(s, fname):
- f = open(fname, 'a')
- curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
- s = '[' + curr_time + ']: ' + s
- if not '\n' in s:
- s += '\n'
- f.write(s)
- f.close()
- print('Log: %s' % (s.strip()))
-
-
-def num_line(fname):
- ''' Return number of lines in file fname. '''
- if not os.path.exists(fname):
- return 0
- f = open(fname)
- lines = f.readlines()
- f.close()
- return len(lines)
-
-
-def lines_with_10_fields(s):
- result = []
- for line in s.split('\n'):
- line = line.strip()
- if len(line.split('\t')) == 10:
- result.append(line)
- return result
-
-
-def age_of_file_in_seconds(fname):
- ''' Return age of fname in days. '''
- st = os.stat(fname)
- seconds = time.time() - st.st_mtime
- return seconds
-
-def concatenate_edge_files(fname_lst, fname_out):
- fout = open(fname_out, 'w')
- for fname in fname_lst:
- f = open(fname)
- s = f.read()
- f.close()
- # Make sure each edge has 10 fields before writing.
- lines = lines_with_10_fields(s)
- if lines != []:
- write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)
- fout.write('\n'.join(lines) + '\n')
- else:
- write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE)
- fout.close()
-
-
-def delete_edge_files(fname_lst):
- age_in_hours = 6
- for fname in fname_lst:
- # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete.
- if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours
- os.remove(fname)
- else:
- write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE)
-
-########## Merge edges #######################
-# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ...
-time.sleep(3)
-edge_file_lst = [] # collect edge files.
-most_recent_edge_modification_time = 0
-write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
-for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged
- edge_file_lst.append(fname)
- if os.path.getmtime(fname) > most_recent_edge_modification_time:
- most_recent_edge_modification_time = os.path.getmtime(fname)
-
-write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
-for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged
- edge_file_lst.append(fname)
- if os.path.getmtime(fname) > most_recent_edge_modification_time:
- most_recent_edge_modification_time = os.path.getmtime(fname)
-
-
-if edge_file_lst == []:
- write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
-elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add.
- # concatenate edge files into one
- write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
- curr_time = datetime.now().strftime('%Y%m%d_%H%M')
- concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time))
- delete_edge_files(edge_file_lst)
-
-if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt
- write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE)
- write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
- cmd = 'python3 merge_edges.py'
- os.system(cmd)
- write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
- manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo find /home/lanhui/brain/Data/temp/html_edges -name "*.html" -exec mv -t /var/www/brain/brain/static/edges {} +'
- write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE)
-
-
-
-write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)
+# Usage: python3 update_network_by_force.py
+# Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday.
+#
+# Revision history:
+# Last modified: 24 Nov 2019, hui <lanhui@zjnu.edu.cn>
+
+import os, sys
+import glob
+import time
+from datetime import datetime
+from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR
+from backup_files import copy_and_backup_file
+
+########## Helper functions #######################
+def write_log_file(s, fname):
+ f = open(fname, 'a')
+ curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
+ s = '[' + curr_time + ']: ' + s
+ if not '\n' in s:
+ s += '\n'
+ f.write(s)
+ f.close()
+ print('Log: %s' % (s.strip()))
+
+
+def num_line(fname):
+ ''' Return number of lines in file fname. '''
+ if not os.path.exists(fname):
+ return 0
+ f = open(fname)
+ lines = f.readlines()
+ f.close()
+ return len(lines)
+
+
+def lines_with_10_fields(s):
+ result = []
+ for line in s.split('\n'):
+ line = line.strip()
+ if len(line.split('\t')) == 10:
+ result.append(line)
+ return result
+
+
+def age_of_file_in_seconds(fname):
+ ''' Return age of fname in days. '''
+ st = os.stat(fname)
+ seconds = time.time() - st.st_mtime
+ return seconds
+
+def concatenate_edge_files(fname_lst, fname_out):
+ fout = open(fname_out, 'w')
+ for fname in fname_lst:
+ f = open(fname)
+ s = f.read()
+ f.close()
+ # Make sure each edge has 10 fields before writing.
+ lines = lines_with_10_fields(s)
+ if lines != []:
+ write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE)
+ fout.write('\n'.join(lines) + '\n')
+ else:
+ write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE)
+ fout.close()
+
+
+def delete_edge_files(fname_lst):
+ age_in_hours = 6
+ for fname in fname_lst:
+ # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete.
+ if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours
+ os.remove(fname)
+ else:
+ write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE)
+
+########## Merge edges #######################
+# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ...
+time.sleep(3)
+edge_file_lst = [] # collect edge files.
+most_recent_edge_modification_time = 0
+write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE)
+for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged
+ edge_file_lst.append(fname)
+ if os.path.getmtime(fname) > most_recent_edge_modification_time:
+ most_recent_edge_modification_time = os.path.getmtime(fname)
+
+write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
+for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged
+ edge_file_lst.append(fname)
+ if os.path.getmtime(fname) > most_recent_edge_modification_time:
+ most_recent_edge_modification_time = os.path.getmtime(fname)
+
+
+if edge_file_lst == []:
+ write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
+elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add.
+ # concatenate edge files into one
+ write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE)
+ curr_time = datetime.now().strftime('%Y%m%d_%H%M')
+ concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time))
+ delete_edge_files(edge_file_lst)
+
+if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt
+ write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
+ cmd = 'python3 merge_edges.py'
+ os.system(cmd)
+ write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE)
+ manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo find /home/lanhui/brain/Data/temp/html_edges -name "*.html" -exec mv -t /var/www/brain/brain/static/edges {} +'
+ write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE)
+ copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis
+
+
+write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE)