diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-07 15:01:31 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2024-08-07 15:01:31 +0800 |
commit | 7cbaecaa81bf55dd36f985c670f5bcb038780895 (patch) | |
tree | 0fd9718eb932fc91cd0f338cee4ffdeb8111ef08 | |
parent | e48a12c582bd8df222a74747c32c5a691b7e7782 (diff) |
Review update_network.py
-rw-r--r-- | Code/configure.py | 2 | ||||
-rw-r--r-- | Code/create_edges0.py | 3 | ||||
-rwxr-xr-x | Code/update_network.py | 79 |
3 files changed, 41 insertions, 43 deletions
diff --git a/Code/configure.py b/Code/configure.py index 4ce159a..41c791a 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -34,7 +34,7 @@ BINDING_FILE = '../Data/history/bind/binding.txt' TPM_FILE = '../Data/history/expr/TPM.txt' # gene expression data
BUILDRMATRIX_RENEW_INTERVAL = 14 # check every 15 days for updating TPM.txt
-MIN_RNA_SEQ_INCREASE = -10000 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
+MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
UPDATE_NETWORK_LOG_FILE = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
diff --git a/Code/create_edges0.py b/Code/create_edges0.py index f124370..16c014a 100644 --- a/Code/create_edges0.py +++ b/Code/create_edges0.py @@ -21,8 +21,7 @@ TARGET_FILE = '../Data/temp/all_targets.txt' TF_FILE = '../Data/temp/all_tfs.txt'
RESULT_FILE = '../Data/temp/corr_all.txt'
R_SCRIPT_FILE = '../Data/temp/compute_simple_correlation.r'
-
-HISTORY_DIR = EDGE_POOL_DIR # edges.txt.* files are here
+HISTORY_DIR = EDGE_POOL_DIR # edges.txt.* files are here
def get_value(s, delimit):
diff --git a/Code/update_network.py b/Code/update_network.py index 74e5599..373703d 100755 --- a/Code/update_network.py +++ b/Code/update_network.py @@ -80,7 +80,9 @@ def make_important_dirs(): make_paths('../Data/C/Raw') make_paths('../Data/history/edges') make_paths('../Data/history/bind') + make_paths('../Data/history/bind/json2') make_paths('../Data/history/expr') + make_paths('../Data/history/expr/json') make_paths('../Webapp/static/json') make_paths('../Webapp/static/edges') make_paths('../Webapp/templates') @@ -244,7 +246,8 @@ def number_rnaseq_diff(para_file, tpm_file): b = number_rnaseq_id(tpm_file) - return a - b + return max(0, a - b) + def validate_gene_file(fname): @@ -451,24 +454,24 @@ def create_edges0(): # The following commands are optional. For example, if a user wants to run it locally, he don't have to provide these TPM tables. if os.path.exists(PARAMETER_FOR_NET_TRAVADB_STRESS): - #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE) cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS) - #os.system(cmd) + os.system(cmd) if os.path.exists(PARAMETER_FOR_NET_TRAVADB_MAP): - #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE) cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_MAP) - #os.system(cmd) + os.system(cmd) if os.path.exists(PARAMETER_FOR_NET_MILD_DROUGHT): - #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE) cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_MILD_DROUGHT) - #os.system(cmd) + os.system(cmd) if os.path.exists(PARAMETER_FOR_NET_WIGGELAB_DIURNAL): - #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE) cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL) - #os.system(cmd) + os.system(cmd) def create_edges0B(): @@ -533,22 +536,22 @@ check_rnaseq_info() # rnaseq informtion is useful for displaying scatterplots # Make sure all necessary files are present, if not, make them if possible miss_lst = all_files_present(FILE_LIST_TO_CHECK) # check if any of them are missing -if miss_lst != []: # miss_lst is non-empty in the beginning. +if miss_lst != []: # miss_lst is non-empty when we first run this script. print('These mandatory files are missing: %s.\nPrepare them first.' % (' '.join(miss_lst))) - write_log_file('[update_network.py] Cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] ERROR: cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE) # initially, we (at most) only have three parameter files, no binding.txt, TPM.txt or edges.txt ... important_miss_number = 0 if PARAMETER_FOR_BUILDCMATRIX in miss_lst: - print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX)) + print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX)) important_miss_number += 1 if PARAMETER_FOR_BUILDRMATRIX in miss_lst: - print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX)) + print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX)) important_miss_number += 1 if PARAMETER_FOR_NET in miss_lst: - print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_NET)) + print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_NET)) important_miss_number += 1 if important_miss_number > 0: @@ -556,45 +559,44 @@ if miss_lst != []: # miss_lst is non-empty in the beginning. if BINDING_FILE in miss_lst: print('[update_network.py]: make initial binding.txt ... wait') - write_log_file('[update_network.py] Make initial binding.txt', UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] INFO: make initial binding.txt', UPDATE_NETWORK_LOG_FILE) cmd = 'python3 get_binding.py %s' % (PARAMETER_FOR_BUILDCMATRIX) - #os.system(cmd) + os.system(cmd) cmd = 'python3 buildCmatrix.py %s > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) - #os.system(cmd) + os.system(cmd) print('[update_network.py]: IMPORATNT: make sure BINDING_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, BINDING_FILE)) sys.exit() if TPM_FILE in miss_lst: print('[update_network.py]: make initial TPM.txt ... wait') - write_log_file('[update_network.py] Make initial TPM.txt', UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] INFO: make initial TPM.txt', UPDATE_NETWORK_LOG_FILE) cmd = 'python3 buildRmatrix.py %s' % (PARAMETER_FOR_BUILDRMATRIX) # produce TPM.txt os.system(cmd) print('[update_network.py]:IMPORTANT: make sure EXPRESSION_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, TPM_FILE)) sys.exit() miss_lst2 = all_files_present(FILE_LIST_TO_CHECK) # check files again - if len(miss_lst2) == 1 and miss_lst2[0] == MERGED_EDGE_FILE: # all other files are ready except edges.txt, make one. + if miss_lst2 [MERGED_EDGE_FILE]: # all other files are ready except edges.txt, make one. print('[update_network.py]: make initial edges.txt ... wait') - create_edgeds0() + create_edges0() -# Make json2 (sliced binding.txt) if it does not exist. Copy json2 to -# the web application folder static/edges [do it manually] for displaying -# binding strength plots. -if not os.path.isdir('../Data/history/bind/json2') and os.path.exists(BINDING_FILE): - write_log_file('Make directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) - cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) - os.system(cmd) - - -# Make json (sliced TPM.txt) if it does not exist. Copy json to the +# Make json (sliced TPM.txt). Copy json to the # web application folder static/edges [manual] for displaying gene # expression scatterplots. -if not os.path.isdir('../Data/history/expr/json') and os.path.exists(TPM_FILE): - write_log_file('Make directory ../Data/history/expr/json. Don\'t forget to copy json to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) +if os.path.exists(TPM_FILE): + write_log_file('[update_network.py] Make individual json files for gene expression information. Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET) os.system(cmd) +# Make json2 (sliced binding.txt). Copy json2 to +# the web application folder static/edges [do it manually] for displaying +# binding strength plots. +if os.path.exists(BINDING_FILE): + write_log_file('[update_network.py] Make individual json files for binding information. Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) + cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) + os.system(cmd) + # Make sure parameter files are present and valid (rudimentary check but important) validate_parameter_for_buildcmatrix(PARAMETER_FOR_BUILDCMATRIX) @@ -678,15 +680,14 @@ if 'parameter_for_buildRmatrix.txt' in updated_file_list and not hold_on(PARAMET curr_date = datetime.now().strftime('%Y%m%d') tpm_sample_size = number_rnaseq_id(TPM_FILE) write_sample_size_file(SAMPLE_SIZE_FILE, curr_date, tpm_sample_size) - # Create edges using all RNA-seq experiments updated_file_list = get_updated_files(FILE_LIST_TO_CHECK, timestamp_dict) -if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. We will recompute edges using the full binding.txt. +if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. In this case, we will recompute edges using the full binding.txt. # Make a full binding.txt since we are going to use the new TPM.txt to recompute all edges write_log_file('[update_network.py] Build full binding matrix for the new TPM.txt.', UPDATE_NETWORK_LOG_FILE) - cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs. Pay attention to include-all in the command-line argument. + cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs. Pay attention to 'include-all' in the command-line argument. os.system(cmd) # target_tf.txt @@ -694,13 +695,13 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. cmd = 'python3 make_target_tf.py %s > %s' % (PARAMETER_FOR_NET, TARGET_TF_FILE) os.system(cmd) - write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt. Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE) + write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt. Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE) ## json -- make/renew json directory for displaying scatterplots cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET) - ## os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots + os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots write_log_file('[update_network.py] Update directory ../Data/history/bind/json2. Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) - #os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths + os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths ## copy ../Data/history/bind/json2 and ../Data/history/expr/json to the web application folder 'static/edges' [manual] if False: # TODO For now I will always use travadb's TPM.txt (138 columns) to display scatterplots. Simpler and faster. @@ -713,8 +714,6 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent. cmd = 'python3 update_rnaseq_info_json.py' os.system(cmd) - - # Compute edges. This could take a lot of time so update FILE_TIMESTAMP first. record_file_time(FILE_LIST_TO_CHECK, FILE_TIMESTAMP) create_edges0() |