summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Code/configure.py4
-rwxr-xr-xCode/update_network.py83
2 files changed, 15 insertions, 72 deletions
diff --git a/Code/configure.py b/Code/configure.py
index 9f159c1..5e329af 100644
--- a/Code/configure.py
+++ b/Code/configure.py
@@ -34,8 +34,8 @@ PARAMETER_FOR_NET_WIGGELAB_DIURNAL = '../Data/parameter/parameter_for_net_wig
BINDING_FILE = '../Data/history/bind/binding.txt'
TPM_FILE = '../Data/history/expr/TPM.txt' # gene expression data
-PARAMETER_FOR_BUILDRMATRIX_RENEW_INTERVAL = 1 # check every 28 days for updating TPM.txt
-MIN_RNA_SEQ_INCREASE = 2 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
+PARAMETER_FOR_BUILDRMATRIX_RENEW_INTERVAL = 14 # check every 28 days for updating TPM.txt
+MIN_RNA_SEQ_INCREASE = 60 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
UPDATE_NETWORK_LOG_FILE = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
diff --git a/Code/update_network.py b/Code/update_network.py
index f0e4ec2..13e1c11 100755
--- a/Code/update_network.py
+++ b/Code/update_network.py
@@ -8,7 +8,7 @@
# 1. crontab -e.
# 2. Add this line: 01 05 * * * cd /home/hui/network/v03/Code && python3 update_network.py
#
-# IMPORTANT: Make sure execute this script (update_network.py) under the directory Code.
+# IMPORTANT: Make sure that you execute this script (update_network.py) under the directory Code.
#
# Purpose: periodically (e.g., per week) run this script to see if the network needs update. If yes, update it.
#
@@ -16,7 +16,7 @@
# parameter_for_buildRmatrix.txt and parameter_for_net.txt to make
# changes in these file effective.
#
-# parameter_for_buildCmatrix.txt will be updated automatically (I
+# parameter_for_buildRmatrix.txt will be updated automatically (I
# hope). However, we need to update parameter_for_buildCmatrix.txt
# manually.
#
@@ -557,45 +557,23 @@ def correlation_mixtools(num_component):
def check_rnaseq_info():
- # check rnaseq_info_database.txt and rnaseq_info_database.json, if they are outdated, then remind us to update it in log file.
+ # check rnaseq_info_database.txt and rnaseq_info_database.json. If they are outdated, then remind us to update it in log file.
if os.path.exists(RNA_SEQ_INFO_DATABASE):
- if age_of_file_in_days(RNA_SEQ_INFO_DATABASE) > 90: # older than 120 days
+ if age_of_file_in_days(RNA_SEQ_INFO_DATABASE) > 120: # older than 120 days
write_log_file('[update_network.py] Need update %s. It is %d days old.' % (RNA_SEQ_INFO_DATABASE, age_of_file_in_days(RNA_SEQ_INFO_DATABASE)), UPDATE_NETWORK_LOG_FILE)
else:
write_log_file('[update_network.py] [MISSING] Must create %s.' % (RNA_SEQ_INFO_DATABASE), UPDATE_NETWORK_LOG_FILE)
if os.path.exists(RNA_SEQ_INFO_DATABASE_JSON):
- if age_of_file_in_days(RNA_SEQ_INFO_DATABASE_JSON) > 90:
+ if age_of_file_in_days(RNA_SEQ_INFO_DATABASE_JSON) > 120:
write_log_file('[update_network.py] Need update %s. It is %d days old.' % (RNA_SEQ_INFO_DATABASE_JSON, age_of_file_in_days(RNA_SEQ_INFO_DATABASE_JSON)), UPDATE_NETWORK_LOG_FILE)
else:
write_log_file('[update_network.py] [MISSING] Must create %s.' % (RNA_SEQ_INFO_DATABASE_JSON), UPDATE_NETWORK_LOG_FILE)
-# def check_process(name):
-# ''' If a process name exists, return 1; otherwise return 0.'''
-# os.system('ps -eF | grep \'%s\' > ../Data/running_processes.txt' % (name))
-# f = open('../Data/running_processes.txt')
-# lines = f.readlines()
-# f.close()
-# for line in lines:
-# line = line.strip()
-# lst = line.split()
-# if 'python' in lst[-2] and name in lst[-1]:
-# return 1
-# return 0
-
-
-
-## main
-
-# if check_process('update_network.py') == 1: # the old update_network.py is running
-# write_log_file('[update_network.py] update_network.py has not finished yet.', UPDATE_NETWORK_LOG_FILE)
-# sys.exit()
-
-
-
+# main
FILE_LIST_TO_CHECK = [PARAMETER_FOR_BUILDCMATRIX, PARAMETER_FOR_BUILDRMATRIX, PARAMETER_FOR_NET, \
- MERGED_EDGE_FILE, BINDING_FILE, TPM_FILE] # a list of very important files
+ MERGED_EDGE_FILE, BINDING_FILE, TPM_FILE] # a list of important files
make_important_dirs() # make important directories (if non-existent) for holding various kinds of files, must be put after os.chdir(CODE_DIR)
#validate_webapp_dir(PARAMETER_FOR_NET) # make sure the directory Webapp contains necessary files, e.g., genes.json.
@@ -650,7 +628,7 @@ if miss_lst != []: # miss_lst is non-empty in the beginning.
# Make json2 (sliced binding.txt) if it does not exist. Copy json2 to
-# the web application folder static/edges [manual] for displaying
+# the web application folder static/edges [do it manually] for displaying
# binding strength plots.
if not os.path.isdir('../Data/history/bind/json2') and os.path.exists(BINDING_FILE):
write_log_file('Make directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
@@ -677,7 +655,7 @@ validate_parameter_for_net(PARAMETER_FOR_NET)
if not os.path.exists(FILE_TIMESTAMP):
record_file_time(FILE_LIST_TO_CHECK, FILE_TIMESTAMP)
-# get update time of mandatory files
+# Get update time of mandatory files
timestamp_dict = read_file_timestamp(FILE_TIMESTAMP)
@@ -692,31 +670,6 @@ if 'parameter_for_buildCmatrix.txt' in updated_file_list and not hold_on(PARAMET
os.system(cmd)
- # # We will only consider ChIP-seq IDs that are less than 7 days
- # # old. Make sure put 'update:yymmdd' in the 'NOTE:' field in
- # # parameter_for_buildCmatrix.txt for each newly added ChIP-seq
- # # data.
- # write_log_file('[update_network.py] Build binding matrix from recently added/modified ChIP-seq data.', UPDATE_NETWORK_LOG_FILE)
- # TEMP_BINDING_FILE = BINDING_FILE + '.temp'
- # cmd = 'python3 buildCmatrix.py %s > %s' % (PARAMETER_FOR_BUILDCMATRIX, TEMP_BINDING_FILE)
- # os.system(cmd)
-
- # # If someone just touched prameter_for_buildCmatrix.txt without
- # # adding any new ChIP-seq data, we should do nothing.
- # if validate_binding_file(TEMP_BINDING_FILE):
- # write_log_file('[update_network.py] Overwrite binding.txt.', UPDATE_NETWORK_LOG_FILE)
- # cm = 'mv %s %s' (TEMP_BINDING_FILE, BINDING_FILE) # Overwrite binding.txt. Make it formal.
- # os.system(cmd)
- # write_log_file('[update_network.py] binding.txt is updated. Number of columns in %s = %d.' % (BINDING_FILE, num_ids(BINDING_FILE)), UPDATE_NETWORK_LOG_FILE)
-
- # write_log_file('[update_network.py] Update target tf file %s.' % (TARGET_TF_FILE), UPDATE_NETWORK_LOG_FILE)
- # cmd = 'python3 make_target_tf.py %s > %s' % (PARAMETER_FOR_NET, TARGET_TF_FILE)
- # os.system(cmd)
- # else:
- # write_log_file('[update_network.py] [WARNING] Invalid binding matrix.', UPDATE_NETWORK_LOG_FILE)
- # os.remove(TEMP_BINDING_FILE)
-
-
updated_file_list = get_updated_files(FILE_LIST_TO_CHECK, timestamp_dict)
if 'binding.txt' in updated_file_list:
write_log_file('[update_network.py] binding.txt has been updated. This update will take effect next time TPM.txt is updated.', UPDATE_NETWORK_LOG_FILE)
@@ -748,16 +701,6 @@ if datetime.now().day % PARAMETER_FOR_BUILDRMATRIX_RENEW_INTERVAL == 0: # check
write_log_file('[update_network.py] Update %s' % (PARAMETER_FOR_BUILDRMATRIX), UPDATE_NETWORK_LOG_FILE)
cmd = 'cp %s %s' % (new_parameter_file, PARAMETER_FOR_BUILDRMATRIX)
os.system(cmd)
-
- # Before we rewrite TPM.txt, we should backup the old TPM.txt
- # write_log_file('[update_network.py] Backup %s' % (TPM_FILE), UPDATE_NETWORK_LOG_FILE)
- # cmd = 'cp %s %s' % (TPM_FILE, TPM_FILE + '.backup.at.' + curr_time)
- # os.system(cmd)
-
- # write_log_file('[update_network.py] Rebuild %s' % (TPM_FILE), UPDATE_NETWORK_LOG_FILE)
- # cmd = 'python3 buildRmatrix.py ../Data/parameter/parameter_for_buildRmatrix.txt'
- # os.system(cmd)
-
else:
write_log_file('[update_network.py] You have downloaded %d RNA-seq since last build of TPM.txt. TPM.txt will be rebuilt if this number reaches %d.' % (num, MIN_RNA_SEQ_INCREASE), UPDATE_NETWORK_LOG_FILE)
@@ -778,7 +721,7 @@ if 'parameter_for_buildRmatrix.txt' in updated_file_list and not hold_on(PARAMET
cmd = 'gzip %s' % (backup_file_name)
os.system(cmd)
- cmd = 'python3 buildRmatrix.py %s' % (PARAMETER_FOR_BUILDRMATRIX) # produce TPM.txt, the location of which is specified in TPM_TABLE in buidlRmatrix.py
+ cmd = 'python3 buildRmatrix.py %s' % (PARAMETER_FOR_BUILDRMATRIX) # produce TPM.txt, whose location is specified in TPM_TABLE in buidlRmatrix.py
os.system(cmd)
curr_date = datetime.now().strftime('%Y%m%d')
@@ -789,7 +732,7 @@ if 'parameter_for_buildRmatrix.txt' in updated_file_list and not hold_on(PARAMET
# Create edges using all RNA-seq experiments
updated_file_list = get_updated_files(FILE_LIST_TO_CHECK, timestamp_dict)
-if 'TPM.txt' in updated_file_list: # we could touch TPM.txt to make it recent. We will recompute edges using the full binding.txt.
+if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. We will recompute edges using the full binding.txt.
# Make a full binding.txt since we are going to use the new TPM.txt to recompute all edges
write_log_file('[update_network.py] Build full binding matrix for the new TPM.txt.', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs. Pay attention to include-all in the command-line argument.
@@ -806,7 +749,7 @@ if 'TPM.txt' in updated_file_list: # we could touch TPM.txt to make it recent.
## os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots
write_log_file('[update_network.py] Update directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET)
- #os.system(cmd) # turn this on if we are going to use this bindingtxt for displaying bar charts of binding strengths
+ #os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths
## copy ../Data/history/bind/json2 and ../Data/history/expr/json to the web application folder 'static/edges' [manual]
if False: # TODO For now I will always use travadb's TPM.txt (138 columns) to display scatterplots. Simpler and faster.
@@ -828,7 +771,7 @@ if 'TPM.txt' in updated_file_list: # we could touch TPM.txt to make it recent.
wedge()
correlation_per_group()
correlation_per_group_fixed_number()
- correlation_mixtools(2)
+ correlation_mixtools(2) # two components
#correlation_mixtools(3)