diff options
| -rw-r--r-- | Code/configure.py | 2 | ||||
| -rw-r--r-- | Code/create_edges0.py | 3 | ||||
| -rwxr-xr-x | Code/update_network.py | 79 | 
3 files changed, 41 insertions, 43 deletions
| diff --git a/Code/configure.py b/Code/configure.py index 4ce159a..41c791a 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -34,7 +34,7 @@ BINDING_FILE               = '../Data/history/bind/binding.txt'  TPM_FILE                   = '../Data/history/expr/TPM.txt' # gene expression data
  BUILDRMATRIX_RENEW_INTERVAL = 14 # check every 15 days for updating TPM.txt
 -MIN_RNA_SEQ_INCREASE = -10000 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
 +MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
  UPDATE_NETWORK_LOG_FILE  = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
  NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
 diff --git a/Code/create_edges0.py b/Code/create_edges0.py index f124370..16c014a 100644 --- a/Code/create_edges0.py +++ b/Code/create_edges0.py @@ -21,8 +21,7 @@ TARGET_FILE   = '../Data/temp/all_targets.txt'  TF_FILE       = '../Data/temp/all_tfs.txt'
  RESULT_FILE   = '../Data/temp/corr_all.txt'
  R_SCRIPT_FILE = '../Data/temp/compute_simple_correlation.r'
 -
 -HISTORY_DIR       = EDGE_POOL_DIR   # edges.txt.* files are here
 +HISTORY_DIR   = EDGE_POOL_DIR   # edges.txt.* files are here
  def get_value(s, delimit):
 diff --git a/Code/update_network.py b/Code/update_network.py index 74e5599..373703d 100755 --- a/Code/update_network.py +++ b/Code/update_network.py @@ -80,7 +80,9 @@ def make_important_dirs():      make_paths('../Data/C/Raw')          make_paths('../Data/history/edges')      make_paths('../Data/history/bind') +    make_paths('../Data/history/bind/json2')      make_paths('../Data/history/expr') +    make_paths('../Data/history/expr/json')      make_paths('../Webapp/static/json')      make_paths('../Webapp/static/edges')          make_paths('../Webapp/templates')     @@ -244,7 +246,8 @@ def number_rnaseq_diff(para_file, tpm_file):      b = number_rnaseq_id(tpm_file) -    return a - b +    return max(0, a - b) +      def validate_gene_file(fname): @@ -451,24 +454,24 @@ def create_edges0():      # The following commands are optional. For example, if a user wants to run it locally, he don't have to provide these TPM tables.      if os.path.exists(PARAMETER_FOR_NET_TRAVADB_STRESS): -        #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS), UPDATE_NETWORK_LOG_FILE)          cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_STRESS) -        #os.system(cmd) +        os.system(cmd)      if os.path.exists(PARAMETER_FOR_NET_TRAVADB_MAP): -        #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_TRAVADB_MAP), UPDATE_NETWORK_LOG_FILE)          cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_TRAVADB_MAP) -        #os.system(cmd) +        os.system(cmd)      if os.path.exists(PARAMETER_FOR_NET_MILD_DROUGHT): -        #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_MILD_DROUGHT), UPDATE_NETWORK_LOG_FILE)          cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_MILD_DROUGHT) -        #os.system(cmd) +        os.system(cmd)      if os.path.exists(PARAMETER_FOR_NET_WIGGELAB_DIURNAL): -        #write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network.py] Create simple edges.txt using create_edges0.py with %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL), UPDATE_NETWORK_LOG_FILE)          cmd = 'python3 create_edges0.py %s' % (PARAMETER_FOR_NET_WIGGELAB_DIURNAL) -        #os.system(cmd) +        os.system(cmd)  def create_edges0B(): @@ -533,22 +536,22 @@ check_rnaseq_info() # rnaseq informtion is useful for displaying scatterplots  # Make sure all necessary files are present, if not, make them if possible  miss_lst = all_files_present(FILE_LIST_TO_CHECK) # check if any of them are missing -if miss_lst != []: # miss_lst is non-empty in the beginning. +if miss_lst != []: # miss_lst is non-empty when we first run this script.      print('These mandatory files are missing: %s.\nPrepare them first.' % (' '.join(miss_lst)))     -    write_log_file('[update_network.py] Cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE) +    write_log_file('[update_network.py] ERROR: cannot find these required files:%s' % (' '.join(miss_lst)), UPDATE_NETWORK_LOG_FILE)      # initially, we (at most) only have three parameter files, no binding.txt, TPM.txt or edges.txt ...      important_miss_number = 0      if PARAMETER_FOR_BUILDCMATRIX in miss_lst: -        print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX)) +        print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDCMATRIX))          important_miss_number += 1      if PARAMETER_FOR_BUILDRMATRIX in miss_lst: -        print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX)) +        print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_BUILDRMATRIX))          important_miss_number += 1      if PARAMETER_FOR_NET in miss_lst: -        print('[update_network.py]: must prepare %s first.' % (PARAMETER_FOR_NET)) +        print('[update_network.py]: ERROR: must prepare %s first.' % (PARAMETER_FOR_NET))          important_miss_number += 1      if important_miss_number > 0: @@ -556,45 +559,44 @@ if miss_lst != []: # miss_lst is non-empty in the beginning.      if BINDING_FILE in miss_lst:          print('[update_network.py]: make initial binding.txt ... wait') -        write_log_file('[update_network.py] Make initial binding.txt', UPDATE_NETWORK_LOG_FILE) +        write_log_file('[update_network.py] INFO: make initial binding.txt', UPDATE_NETWORK_LOG_FILE)          cmd = 'python3 get_binding.py %s' % (PARAMETER_FOR_BUILDCMATRIX) -        #os.system(cmd) +        os.system(cmd)          cmd = 'python3 buildCmatrix.py %s > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) -        #os.system(cmd) +        os.system(cmd)          print('[update_network.py]: IMPORATNT: make sure BINDING_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, BINDING_FILE))          sys.exit()      if TPM_FILE in miss_lst:          print('[update_network.py]: make initial TPM.txt ... wait')         -        write_log_file('[update_network.py] Make initial TPM.txt', UPDATE_NETWORK_LOG_FILE)         +        write_log_file('[update_network.py] INFO: make initial TPM.txt', UPDATE_NETWORK_LOG_FILE)                  cmd = 'python3 buildRmatrix.py %s' % (PARAMETER_FOR_BUILDRMATRIX) # produce TPM.txt          os.system(cmd)          print('[update_network.py]:IMPORTANT: make sure EXPRESSION_MATRIX in %s was set %s and rerun update_network.py.' % (PARAMETER_FOR_NET, TPM_FILE))          sys.exit()      miss_lst2 = all_files_present(FILE_LIST_TO_CHECK) # check files again -    if len(miss_lst2) == 1 and miss_lst2[0] == MERGED_EDGE_FILE: # all other files are ready except edges.txt, make one. +    if miss_lst2 [MERGED_EDGE_FILE]: # all other files are ready except edges.txt, make one.          print('[update_network.py]: make initial edges.txt ... wait') -        create_edgeds0() +        create_edges0() -# Make json2 (sliced binding.txt) if it does not exist.  Copy json2 to -# the web application folder static/edges [do it manually] for displaying -# binding strength plots. -if not os.path.isdir('../Data/history/bind/json2') and os.path.exists(BINDING_FILE): -    write_log_file('Make directory ../Data/history/bind/json2.  Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) -    cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) -    os.system(cmd) - - -# Make json (sliced TPM.txt) if it does not exist.  Copy json to the +# Make json (sliced TPM.txt).  Copy json to the  # web application folder static/edges [manual] for displaying gene  # expression scatterplots. -if not os.path.isdir('../Data/history/expr/json') and os.path.exists(TPM_FILE): -    write_log_file('Make directory ../Data/history/expr/json.  Don\'t forget to copy json to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) +if os.path.exists(TPM_FILE): +    write_log_file('[update_network.py] Make individual json files for gene expression information.  Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)      cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET)      os.system(cmd) +# Make json2 (sliced binding.txt).  Copy json2 to +# the web application folder static/edges [do it manually] for displaying +# binding strength plots. +if os.path.exists(BINDING_FILE): +    write_log_file('[update_network.py] Make individual json files for binding information.  Don\'t forget to copy these files to folder static/edges in the web application.', UPDATE_NETWORK_LOG_FILE) +    cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) +    os.system(cmd) +  # Make sure parameter files are present and valid (rudimentary check but important)  validate_parameter_for_buildcmatrix(PARAMETER_FOR_BUILDCMATRIX) @@ -678,15 +680,14 @@ if 'parameter_for_buildRmatrix.txt' in updated_file_list and not hold_on(PARAMET      curr_date = datetime.now().strftime('%Y%m%d')      tpm_sample_size = number_rnaseq_id(TPM_FILE)      write_sample_size_file(SAMPLE_SIZE_FILE, curr_date, tpm_sample_size) -      # Create edges using all RNA-seq experiments  updated_file_list = get_updated_files(FILE_LIST_TO_CHECK, timestamp_dict) -if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.  We will recompute edges using the full binding.txt. +if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.  In this case, we will recompute edges using the full binding.txt.      # Make a full binding.txt since we are going to use the new TPM.txt to recompute all edges      write_log_file('[update_network.py] Build full binding matrix for the new TPM.txt.', UPDATE_NETWORK_LOG_FILE) -    cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs.  Pay attention to include-all in the command-line argument. +    cmd = 'python3 buildCmatrix.py %s include-all > %s' % (PARAMETER_FOR_BUILDCMATRIX, BINDING_FILE) # include all ChIP-seq IDs.  Pay attention to 'include-all' in the command-line argument.      os.system(cmd)      # target_tf.txt @@ -694,13 +695,13 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.      cmd = 'python3 make_target_tf.py %s > %s' % (PARAMETER_FOR_NET, TARGET_TF_FILE)      os.system(cmd) -    write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt.  Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE)     +    write_log_file('[update_network.py] Update ../Data/history/expr/json using the new TPM.txt.  Don\'t forget to update the static/edges/json folder in the web application.', UPDATE_NETWORK_LOG_FILE)      ## json -- make/renew json directory for displaying scatterplots      cmd = 'python3 slice_TPM_to_JSON.py %s' % (PARAMETER_FOR_NET) -    ## os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots +    os.system(cmd) # turn this on if we are going to use this TPM.txt for displaying scatterplots      write_log_file('[update_network.py] Update directory ../Data/history/bind/json2.  Don\'t forget to copy json2 to static/edges in the web application.', UPDATE_NETWORK_LOG_FILE)      cmd = 'python3 slice_binding_to_JSON.py %s' % (PARAMETER_FOR_NET) -    #os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths +    os.system(cmd) # turn this on if we are going to use this binding.txt for displaying bar charts of binding strengths      ## copy ../Data/history/bind/json2 and ../Data/history/expr/json to the web application folder 'static/edges' [manual]      if False:  # TODO For now I will always use travadb's TPM.txt (138 columns) to display scatterplots. Simpler and faster. @@ -713,8 +714,6 @@ if 'TPM.txt' in updated_file_list: # we could _touch_ TPM.txt to make it recent.          cmd = 'python3 update_rnaseq_info_json.py'          os.system(cmd) - -      # Compute edges.  This could take a lot of time so update FILE_TIMESTAMP first.      record_file_time(FILE_LIST_TO_CHECK, FILE_TIMESTAMP)      create_edges0() | 
