From 9cea760a0d3e7b64eb16d99166b8b742bdc866dc Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Tue, 20 Jul 2021 11:15:18 +0800 Subject: update_network_by_force.py: get the difference in edges (yesterday vs. today). --- Code/configure.py | 1 + Code/update_network_by_force.py | 7 ++++++- Code/utils.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 Code/utils.py (limited to 'Code') diff --git a/Code/configure.py b/Code/configure.py index d642d09..b7cb1c9 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -52,5 +52,6 @@ MAPPED_CDATA_DIR = '../Data/C/Mapped' # mapped ChIp-seq data EDGE_POOL_DIR = '/disk1/edge_pool' MERGED_EDGE_FILE = '../Data/temp/edges.txt' +DIFF_EDGE_FILE = '../Data/temp/edges-diff.txt' # the difference between two edge files from yesterday and from today TARGET_TF_FILE = '../Data/information/target_tf.txt' diff --git a/Code/update_network_by_force.py b/Code/update_network_by_force.py index c47002c..4d72463 100644 --- a/Code/update_network_by_force.py +++ b/Code/update_network_by_force.py @@ -10,8 +10,10 @@ import time from datetime import datetime from configure import HISTORY_DIR, HISTORY_DIR2, UPDATE_NETWORK_LOG_FILE, MERGED_EDGE_FILE, EDGE_POOL_DIR from configure import PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_NET +from configure import DIFF_EDGE_FILE from backup_files import copy_and_backup_file from overlap import Overlap +import utils ########## Helper functions ####################### def write_log_file(s, fname): @@ -134,12 +136,15 @@ if os.path.getmtime(MERGED_EDGE_FILE) < os.path.getmtime(EDGE_POOL_DIR): # edge write_log_file('[update_network_by_force.py] Make a new edges.txt from edge files in %s.' % (EDGE_POOL_DIR), UPDATE_NETWORK_LOG_FILE) write_log_file('[update_network_by_force.py] Number of lines in the old edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) + Sold = utils.get_edge_set(MERGED_EDGE_FILE) # all old edges stored in a set cmd = 'python3 merge_edges.py' # invoke another script the merge all edge files in EDGE_POOL_DIR return_value = os.system(cmd) if return_value != 0: write_log_file('[update_network_by_force.py] Something wrong occurred to merge_edges.py. Perhaps your computer is running out of memory.', UPDATE_NETWORK_LOG_FILE) write_log_file('[update_network_by_force.py] Number of lines in the new edges.txt: %d.' % (num_line(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) - write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network_by_force.py] %s' % (summarize_edge_file(MERGED_EDGE_FILE)), UPDATE_NETWORK_LOG_FILE) + Snew = utils.get_edge_set(MERGED_EDGE_FILE) # all new edges stored in a set. Note that MERGED_EDGE_FILE has been updated by 'python3 merge_edges.py' + utils.make_new_edges_file(Sold, Snew, MERGED_EDGE_FILE, DIFF_EDGE_FILE) manual_copy_commands = 'Please copy files to the web application: sudo cp /home/lanhui/brain/Data/temp/edges.txt /var/www/brain/brain/static/edges/edges.txt sudo cp /home/lanhui/brain/Data/temp/html_edges/edges.sqlite /var/www/brain/brain/static/edges curl http://118.25.96.118/brain/before' write_log_file('[update_network_by_force.py] %s' % (manual_copy_commands), UPDATE_NETWORK_LOG_FILE) write_log_file('[update_network_by_force.py] Make html files for the web application.', UPDATE_NETWORK_LOG_FILE) diff --git a/Code/utils.py b/Code/utils.py new file mode 100644 index 0000000..f78980a --- /dev/null +++ b/Code/utils.py @@ -0,0 +1,39 @@ +# Utility functions +# Purpose: check what new edges have been created today. +# Created by Hui on 20 July 2021 + +def get_edge_set(fname): + result = set() + with open(fname) as f: + for line in f: + line = line.strip() + lst = line.split('\t') + if len(lst) == 10: + target_id = lst[0].split()[0].upper() + tf_id = lst[1].split()[0].upper() + result.add(target_id + '_' + tf_id) + return result + + +def make_new_edges_file(Sold, Snew, fname_new, output_file): + Sdiff = Snew.difference(Sold) + result = [] + with open(fname_new) as f: + for line in f: + line = line.strip() + lst = line.split('\t') + if len(lst) == 10: + target_id = lst[0].split()[0].upper() + tf_id = lst[1].split()[0].upper() + if target_id + '_' + tf_id in Sdiff: # this is a new edge line, keep it + result.append(line) + with open(output_file, 'w') as f: + for line in result: + f.write(line + '\n') + + +if __name__ == '__main__': + S2 = get_edge_set('/home/lanhui/brain/Data/temp/edges.txt') + S1 = get_edge_set('/home/lanhui/brain/Data/temp/edges.txt.old') + make_new_edges_file(S1, S2, '/home/lanhui/brain/Data/temp/edges.txt', 'mynewedges.txt') + -- cgit v1.2.1