summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2024-08-07 15:01:31 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2024-08-07 15:01:31 +0800
commit7cbaecaa81bf55dd36f985c670f5bcb038780895 (patch)
tree0fd9718eb932fc91cd0f338cee4ffdeb8111ef08
parente48a12c582bd8df222a74747c32c5a691b7e7782 (diff)
Review update_network.py
-rw-r--r--Code/configure.py2
-rw-r--r--Code/create_edges0.py3
-rwxr-xr-xCode/update_network.py79
3 files changed, 41 insertions, 43 deletions
diff --git a/Code/configure.py b/Code/configure.py
index 4ce159a..41c791a 100644
--- a/Code/configure.py
+++ b/Code/configure.py
@@ -34,7 +34,7 @@ BINDING_FILE = '../Data/history/bind/binding.txt'
TPM_FILE = '../Data/history/expr/TPM.txt' # gene expression data
BUILDRMATRIX_RENEW_INTERVAL = 14 # check every 15 days for updating TPM.txt
-MIN_RNA_SEQ_INCREASE = -10000 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
+MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
UPDATE_NETWORK_LOG_FILE = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
diff --git a/Code/create_edges0.py b/Code/create_edges0.py
index f124370..16c014a 100644
--- a/Code/create_edges0.py
+++ b/Code/create_edges0.py
@@ -21,8 +21,7 @@ TARGET_FILE = '../Data/temp/all_targets.txt'
TF_FILE = '../Data/temp/all_tfs.txt'
RESULT_FILE = '../Data/temp/corr_all.txt'
R_SCRIPT_FILE = '../Data/temp/compute_simple_correlation.r'
-
-HISTORY_DIR = EDGE_POOL_DIR # edges.txt.* files are here
+HISTORY_DIR = EDGE_POOL_DIR # edges.txt.* files are here
def get_value(s, delimit):
diff --git a/Code/update_network.py b/Code/update_network.py
index 74e5599..373703d 100755
--- a/Code/update_network.py
+++ b/Code/update_network.py
@@ -80,7 +80,9 @@ def make_important_dirs():
make_paths('../Data/C/Raw')
make_paths('../Data/history/edges')
make_paths('../Data/history/bind')
+ make_paths('../Data/history/bind/json2')
make_paths('../Data/history/expr')
+ make_paths('../Data/history/expr/json')
make_paths('../Webapp/static/json')
make_paths('../Webapp/static/edges')
make_paths('../Webapp/templates')
@@ -244,7 +246,8 @@ def number_rnaseq_diff(para_file, tpm_file):
b = number_rnaseq_id(tpm_file)
- return a - b
+ return max(0, a - b)
+
def validate_gene_file(fname):
@@ -451,24 +454,24 @@ def create_edges0():
# The following commands are optional. For example, if a user wants to run it locally, he don't have to provide these TPM tables.
if os.path.exists(PARAMETER_FOR_NET_TRAVADB_STRESS):
- #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS)
- #os.system(cmd)
+ os.system(cmd)
if os.path.exists(PARAMETER_FOR_NET_TRAVADB_MAP):
- #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_MAP)
- #os.system(cmd)
+ os.system(cmd)
if os.path.exists(PARAMETER_FOR_NET_MILD_DROUGHT):
- #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_MILD_DROUGHT)
- #os.system(cmd)
+ os.system(cmd)
if os.path.exists(PARAMETER_FOR_NET_WIGGELAB_DIURNAL):
- #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL)
- #os.system(cmd)
+ os.system(cmd)
def create_edges0B():
@@ -533,22 +536,22 @@ check_rnaseq_info() # rnaseq informtion is useful for displaying scatterplots
# Make sure all necessary files are present, if not, make them if possible
miss_lst = all_files_present(FILE_LIST_TO_CHECK) # check if any of them are missing
-if miss_lst != []: # miss_lst is non-empty in the beginning.
+if miss_lst != []: # miss_lst is non-empty when we first run this script.
print('These mandatory files are missing: %s.\nPrepare them first.' % (' '.join(miss_lst)))
- write_log_file('[update_network.py] Cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] ERROR: cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE)
# initially, we (at most) only have three parameter files, no binding.txt, TPM.txt or edges.txt ...
important_miss_number = 0
if PARAMETER_FOR_BUILDCMATRIX in miss_lst:
- print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX))
+ print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX))
important_miss_number += 1
if PARAMETER_FOR_BUILDRMATRIX in miss_lst:
- print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX))
+ print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX))
important_miss_number += 1
if PARAMETER_FOR_NET in miss_lst:
- print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_NET))
+ print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_NET))
important_miss_number += 1
if important_miss_number > 0:
@@ -556,45 +559,44 @@ if miss_lst != []: # miss_lst is non-empty in the beginning.
if BINDING_FILE in miss_lst:
print('[update_network.py]: make initial binding.txt ... wait')
- write_log_file('[update_network.py] Make initial binding.txt', UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] INFO: make initial binding.txt', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 get_binding.py %s' % (PARAMETER_FOR_BUILDCMATRIX)
- #os.system(cmd)
+ os.system(cmd)
cmd = 'python3 buildCmatrix.py %s > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE)
- #os.system(cmd)
+ os.system(cmd)
print('[update_network.py]: IMPORATNT: make sure BINDING_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, BINDING_FILE))
sys.exit()
if TPM_FILE in miss_lst:
print('[update_network.py]: make initial TPM.txt ... wait')
- write_log_file('[update_network.py] Make initial TPM.txt', UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] INFO: make initial TPM.txt', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 buildRmatrix.py %s' % (PARAMETER_FOR_BUILDRMATRIX) # produce TPM.txt
os.system(cmd)
print('[update_network.py]:IMPORTANT: make sure EXPRESSION_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, TPM_FILE))
sys.exit()
miss_lst2 = all_files_present(FILE_LIST_TO_CHECK) # check files again
- if len(miss_lst2) == 1 and miss_lst2[0] == MERGED_EDGE_FILE: # all other files are ready except edges.txt, make one.
+ if miss_lst2 [MERGED_EDGE_FILE]: # all other files are ready except edges.txt, make one.
print('[update_network.py]: make initial edges.txt ... wait')
- create_edgeds0()
+ create_edges0()
-# Make json2 (sliced binding.txt) if it does not exist. Copy json2 to
-# the web application folder static/edges [do it manually] for displaying
-# binding strength plots.
-if not os.path.isdir('../Data/history/bind/json2') and os.path.exists(BINDING_FILE):
- write_log_file('Make directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
- cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET)
- os.system(cmd)
-
-
-# Make json (sliced TPM.txt) if it does not exist. Copy json to the
+# Make json (sliced TPM.txt). Copy json to the
# web application folder static/edges [manual] for displaying gene
# expression scatterplots.
-if not os.path.isdir('../Data/history/expr/json') and os.path.exists(TPM_FILE):
- write_log_file('Make directory ../Data/history/expr/json. Don\'t forget to copy json to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
+if os.path.exists(TPM_FILE):
+ write_log_file('[update_network.py] Make individual json files for gene expression information. Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET)
os.system(cmd)
+# Make json2 (sliced binding.txt). Copy json2 to
+# the web application folder static/edges [do it manually] for displaying
+# binding strength plots.
+if os.path.exists(BINDING_FILE):
+ write_log_file('[update_network.py] Make individual json files for binding information. Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
+ cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET)
+ os.system(cmd)
+
# Make sure parameter files are present and valid (rudimentary check but important)
validate_parameter_for_buildcmatrix(PARAMETER_FOR_BUILDCMATRIX)
@@ -678,15 +680,14 @@ if 'parameter_for_buildRmatrix.txt' in updated_file_list and not hold_on(PARAMET
curr_date = datetime.now().strftime('%Y%m%d')
tpm_sample_size = number_rnaseq_id(TPM_FILE)
write_sample_size_file(SAMPLE_SIZE_FILE, curr_date, tpm_sample_size)
-
# Create edges using all RNA-seq experiments
updated_file_list = get_updated_files(FILE_LIST_TO_CHECK, timestamp_dict)
-if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. We will recompute edges using the full binding.txt.
+if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. In this case, we will recompute edges using the full binding.txt.
# Make a full binding.txt since we are going to use the new TPM.txt to recompute all edges
write_log_file('[update_network.py] Build full binding matrix for the new TPM.txt.', UPDATE_NETWORK_LOG_FILE)
- cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs. Pay attention to include-all in the command-line argument.
+ cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs. Pay attention to 'include-all' in the command-line argument.
os.system(cmd)
# target_tf.txt
@@ -694,13 +695,13 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.
cmd = 'python3 make_target_tf.py %s > %s' % (PARAMETER_FOR_NET, TARGET_TF_FILE)
os.system(cmd)
- write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt. Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE)
+ write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt. Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE)
## json -- make/renew json directory for displaying scatterplots
cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET)
- ## os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots
+ os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots
write_log_file('[update_network.py] Update directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)
cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET)
- #os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths
+ os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths
## copy ../Data/history/bind/json2 and ../Data/history/expr/json to the web application folder 'static/edges' [manual]
if False: # TODO For now I will always use travadb's TPM.txt (138 columns) to display scatterplots. Simpler and faster.
@@ -713,8 +714,6 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.
cmd = 'python3 update_rnaseq_info_json.py'
os.system(cmd)
-
-
# Compute edges. This could take a lot of time so update FILE_TIMESTAMP first.
record_file_time(FILE_LIST_TO_CHECK, FILE_TIMESTAMP)
create_edges0()