From e7f996c480b3be437794041077b297f3e9ddc1bb Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sat, 7 Dec 2019 09:58:54 +0800 Subject: create backup_files.py for backing up files Define a function copy_and_backup_file(src_file, dest_dir) to do backup and compression work. The function copy_and_backup_file is used in update_network_by_force.py. -Hui --- Code/backup_files.py | 54 ++++++++++ Code/update_network.py | 1 + Code/update_network_by_force.py | 227 ++++++++++++++++++++-------------------- 3 files changed, 169 insertions(+), 113 deletions(-) create mode 100644 Code/backup_files.py diff --git a/Code/backup_files.py b/Code/backup_files.py new file mode 100644 index 0000000..39a0d30 --- /dev/null +++ b/Code/backup_files.py @@ -0,0 +1,54 @@ +# Usage: use the copy_and_backup_file() as a utility function for backing up a file. +# +# Purpose: the function copy_and_backup_file(src, dest_dir) copies file src to destination directory (if exists) and compress +# the copied file in the destination directory (to save space). +# +# +# Created on 7 December 2019 by Hui Lan (lanhui@zjnu.edu.cn) + +import os, sys +from configure import UPDATE_NETWORK_LOG_FILE +from datetime import datetime + +MINIMUM_SPACE_REQUIREMENT = 1 # Gigabytes + +def write_log_file(s, fname): + if not os.path.exists(fname): + return None + f = open(fname, 'a') + curr_time = datetime.now().strftime('%Y-%m-%d %H:%M') + s = '[' + curr_time + ']: ' + s + if not '\n' in s: + s += '\n' + f.write(s) + f.close() + + +def make_paths(s): + if not os.path.isdir(s): + os.makedirs(s) + + +def disk_has_enough_space(): + available_G = 4 * os.statvfs('/home').f_bavail / (1024*1024) # compute available space (in Gigabytes). Each block has 4k bytes, work for Linux/UNIX systems only + if available_G < MINIMUM_SPACE_REQUIREMENT: + print('[backup_files.py] home directory does not have enough space (only %4.1f G is available) ' % (available_G)) + write_log_file('[backup_files.py] WARNING: home directory does not have enough space (only %4.1f G is available)! No backup is carried out.' % (available_G), UPDATE_NETWORK_LOG_FILE) + sys.exit() + + +def copy_and_backup_file(src_file, dest_dir): + disk_has_enough_space() # make sure we have enough space firs.t + if not os.path.exists(src_file): + sys.exit() + make_paths(dest_dir) # if dest_dir does not exist, create it. + curr_date = datetime.now().strftime('%Y%m%d') + dest_file = os.path.join(dest_dir, os.path.basename(src_file) + '.' + curr_date) + cmd = 'cp %s %s && cd %s && gzip -f %s' % (src_file, dest_file, dest_dir, dest_file) + os.system(cmd) + write_log_file('[backup_files.py] File %s has been backed up to %s and zipped (.gz)' % (src_file, dest_file), UPDATE_NETWORK_LOG_FILE) + + +## main +if __name__ == '__main__': + copy_and_backup_file('../Data/temp/edges.txt', '../Analysis') diff --git a/Code/update_network.py b/Code/update_network.py index e29eac1..f65b3ba 100755 --- a/Code/update_network.py +++ b/Code/update_network.py @@ -593,6 +593,7 @@ def check_rnaseq_info(): # sys.exit() + FILE_LIST_TO_CHECK = [PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_NET, \ MERGED_EDGE_FILE, BINDING_FILE, TPM_FILE] # a list of very important files diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index 7ba8c87..11e8d72 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -1,113 +1,114 @@ -# Usage: python3 update_network_by_force.py -# Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday. -# -# Revision history: -# Last modified: 24 Nov 2019, hui - -import os, sys -import glob -import time -from datetime import datetime -from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR - -########## Helper functions ####################### -def write_log_file(s, fname): - f = open(fname, 'a') - curr_time = datetime.now().strftime('%Y-%m-%d %H:%M') - s = '[' + curr_time + ']: ' + s - if not '\n' in s: - s += '\n' - f.write(s) - f.close() - print('Log: %s' % (s.strip())) - - -def num_line(fname): - ''' Return number of lines in file fname. ''' - if not os.path.exists(fname): - return 0 - f = open(fname) - lines = f.readlines() - f.close() - return len(lines) - - -def lines_with_10_fields(s): - result = [] - for line in s.split('\n'): - line = line.strip() - if len(line.split('\t')) == 10: - result.append(line) - return result - - -def age_of_file_in_seconds(fname): - ''' Return age of fname in days. ''' - st = os.stat(fname) - seconds = time.time() - st.st_mtime - return seconds - -def concatenate_edge_files(fname_lst, fname_out): - fout = open(fname_out, 'w') - for fname in fname_lst: - f = open(fname) - s = f.read() - f.close() - # Make sure each edge has 10 fields before writing. - lines = lines_with_10_fields(s) - if lines != []: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) - fout.write('\n'.join(lines) + '\n') - else: - write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) - fout.close() - - -def delete_edge_files(fname_lst): - age_in_hours = 6 - for fname in fname_lst: - # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete. - if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours - os.remove(fname) - else: - write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE) - -########## Merge edges ####################### -# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ... -time.sleep(3) -edge_file_lst = [] # collect edge files. -most_recent_edge_modification_time = 0 -write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE) -for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged - edge_file_lst.append(fname) - if os.path.getmtime(fname) > most_recent_edge_modification_time: - most_recent_edge_modification_time = os.path.getmtime(fname) - -write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) -for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged - edge_file_lst.append(fname) - if os.path.getmtime(fname) > most_recent_edge_modification_time: - most_recent_edge_modification_time = os.path.getmtime(fname) - - -if edge_file_lst == []: - write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) -elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add. - # concatenate edge files into one - write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) - curr_time = datetime.now().strftime('%Y%m%d_%H%M') - concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)) - delete_edge_files(edge_file_lst) - -if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt - write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE) - write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) - cmd = 'python3 merge_edges.py' - os.system(cmd) - write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) - manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo find /home/lanhui/brain/Data/temp/html_edges -name "*.html" -exec mv -t /var/www/brain/brain/static/edges {} +' - write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE) - - - -write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE) +# Usage: python3 update_network_by_force.py +# Purpose: update_network.py could take a few days to run. Run this script to harvest new edges everyday. +# +# Revision history: +# Last modified: 24 Nov 2019, hui + +import os, sys +import glob +import time +from datetime import datetime +from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR +from backup_files import copy_and_backup_file + +########## Helper functions ####################### +def write_log_file(s, fname): + f = open(fname, 'a') + curr_time = datetime.now().strftime('%Y-%m-%d %H:%M') + s = '[' + curr_time + ']: ' + s + if not '\n' in s: + s += '\n' + f.write(s) + f.close() + print('Log: %s' % (s.strip())) + + +def num_line(fname): + ''' Return number of lines in file fname. ''' + if not os.path.exists(fname): + return 0 + f = open(fname) + lines = f.readlines() + f.close() + return len(lines) + + +def lines_with_10_fields(s): + result = [] + for line in s.split('\n'): + line = line.strip() + if len(line.split('\t')) == 10: + result.append(line) + return result + + +def age_of_file_in_seconds(fname): + ''' Return age of fname in days. ''' + st = os.stat(fname) + seconds = time.time() - st.st_mtime + return seconds + +def concatenate_edge_files(fname_lst, fname_out): + fout = open(fname_out, 'w') + for fname in fname_lst: + f = open(fname) + s = f.read() + f.close() + # Make sure each edge has 10 fields before writing. + lines = lines_with_10_fields(s) + if lines != []: + write_log_file('[update_network_by_force.py] In function concatenate_edge_files. File %s has %d rows with 10 columns.' % (fname, len(lines)), UPDATE_NETWORK_LOG_FILE) + fout.write('\n'.join(lines) + '\n') + else: + write_log_file('[update_network_by_force.py] In function concatenate_edge_files. Check file %s. It has no rows with 10 fields.' % (fname), UPDATE_NETWORK_LOG_FILE) + fout.close() + + +def delete_edge_files(fname_lst): + age_in_hours = 6 + for fname in fname_lst: + # Before we delete, we should make sure it is not being written. Make sure it is old enough. Otherwise, don't delete. + if age_of_file_in_seconds(fname) > age_in_hours*60*60: # 6 hours + os.remove(fname) + else: + write_log_file('[update_network_by_force.py] In function delete_edge_files. Check file %s. It is probably still being written (age less than %d hours). So I don\'t delete it.' % (fname, age_in_hours), UPDATE_NETWORK_LOG_FILE) + +########## Merge edges ####################### +# update edges.txt, a merged file from two sources, HISTORY_DIR and HISTORY_DIR2. Some new edge files are being generated ... +time.sleep(3) +edge_file_lst = [] # collect edge files. +most_recent_edge_modification_time = 0 +write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR), UPDATE_NETWORK_LOG_FILE) +for fname in glob.glob(os.path.join(HISTORY_DIR, 'edges.txt.*')): # many small edges.txt.* are to be merged + edge_file_lst.append(fname) + if os.path.getmtime(fname) > most_recent_edge_modification_time: + most_recent_edge_modification_time = os.path.getmtime(fname) + +write_log_file('[update_network_by_force.py] Look at edge files in %s.' % (HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) +for fname in glob.glob(os.path.join(HISTORY_DIR2, 'edges.txt.*')): # edges.txt.* are to be merged + edge_file_lst.append(fname) + if os.path.getmtime(fname) > most_recent_edge_modification_time: + most_recent_edge_modification_time = os.path.getmtime(fname) + + +if edge_file_lst == []: + write_log_file('[update_network_by_force.py] No edge files to merge in %s and %s.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) +elif os.path.getmtime(MERGED_EDGE_FILE) < most_recent_edge_modification_time: # update edges.txt only if there are newer edges to add. + # concatenate edge files into one + write_log_file('[update_network_by_force.py] Concatenate edge files in %s and %s into one file.' % (HISTORY_DIR, HISTORY_DIR2), UPDATE_NETWORK_LOG_FILE) + curr_time = datetime.now().strftime('%Y%m%d_%H%M') + concatenate_edge_files(edge_file_lst, os.path.join(EDGE_POOL_DIR, 'edges.txt.many.one.targets.' + curr_time)) + delete_edge_files(edge_file_lst) + +if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge pool directory has been updated, create new edges.txt + write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) + cmd = 'python3 merge_edges.py' + os.system(cmd) + write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) + manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo find /home/lanhui/brain/Data/temp/html_edges -name "*.html" -exec mv -t /var/www/brain/brain/static/edges {} +' + write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE) + copy_and_backup_file(MERGED_EDGE_FILE, '../Analysis') # the backup file will be used for further analysis + + +write_log_file('[update_network_by_force.py] Update done at %s.\n\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S')), UPDATE_NETWORK_LOG_FILE) -- cgit v1.2.1