From 824629706b75b3c5b4275c2f6d6ce540bf6dfc73 Mon Sep 17 00:00:00 2001 From: Lan Hui Date: Sun, 11 Aug 2024 15:01:58 +0800 Subject: dos2unix --- Code/configure.py | 112 +++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'Code/configure.py') diff --git a/Code/configure.py b/Code/configure.py index 73fc9cc..2f0fbd9 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -1,56 +1,56 @@ -# From get_TPM_by_salmon.py -SALMON = '/home/lanhui/brain/Salmon/Salmon-0.7.2_linux_x86_64/bin/salmon' # salmon software path -SALMON_INDEX = '/home/lanhui/brain/Salmon/salmon_index' -TRANSCRIPTOME = '/home/lanhui/brain/Salmon/Arabidopsis_thaliana.TAIR10.cdna.all.fa' -SALMON_MAP_RESULT_DIR = '../Data/temp/salmon_map_result' -KMER = 31 - -# From download_and_map.py -DAILY_MAP_NUMBER = 4 # download this many samples each time. I have tested the values of 3, 4, 5, 8. -MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB -RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq -DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs -IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size. -MAPPED_RDATA_DIR = '../Data/R/Mapped/public' # mapped RNA-seq (file names ended with _quant.txt) go here -RAW_RDATA_DIR = '/disk1/Data/R/Raw' # downloaded files go here, was "../Data/R/Raw" (now almost full). - -# From update_network.py -# Don'T change the following paths and names -HISTORY_DIR = '../Data/history/edges/many_targets' # each edge file contains edges for many targets -HISTORY_DIR2 = '../Data/history/edges/one_target' # edges.txt.* files are here, all edge files have the name edges.txt.*, the leading string 'edges.txt' must be present. -TIMESTAMP_FILE = '../Data/log/file_timestamp.txt' # record last modified time of several important files -SAMPLE_SIZE_FILE = '../Data/log/total.samples.txt' # each line contains a date and the number of samples on and after that date -TEMP_DIR = '../Data/temp' - -PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt' -PARAMETER_FOR_BUILDRMATRIX = '../Data/parameter/parameter_for_buildRmatrix.txt' -PARAMETER_FOR_NET = '../Data/parameter/parameter_for_net.txt' -PARAMETER_FOR_NET_TRAVADB_STRESS = '../Data/parameter/parameter_for_net_travadb_stress.txt' -PARAMETER_FOR_NET_TRAVADB_MAP = '../Data/parameter/parameter_for_net_travadb_map.txt' -PARAMETER_FOR_NET_MILD_DROUGHT = '../Data/parameter/parameter_for_net_mild_drought.txt' -PARAMETER_FOR_NET_WIGGELAB_DIURNAL = '../Data/parameter/parameter_for_net_wiggelab_diurnal.txt' - -BINDING_FILE = '../Data/history/bind/binding.txt' -TPM_FILE = '../Data/history/expr/TPM.txt' # gene expression data - -BUILDRMATRIX_RENEW_INTERVAL = 28 # check every 28 days for updating TPM.txt -MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt -UPDATE_NETWORK_LOG_FILE = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time. -NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt' - -RNA_SEQ_INFO_DATABASE = '../Data/information/rnaseq_info_database.txt' # same as RNA_SEQ_INFO_FILE -RNA_SEQ_INFO_DATABASE_JSON = '../Data/information/rnaseq_info_database.json' - -GENE_ID_FIRST_TWO_LETTERS = 'AT' -MEMORY_STRENGTH = 365 # memory retention power (larger value means better memory) - -# -MAPPED_CDATA_DIR = '../Data/C/Mapped' # mapped ChIp-seq data - -# Used in merge_edges.py -EDGE_POOL_DIR = '/disk1/edge_pool' -MERGED_EDGE_FILE = '../Data/temp/edges.txt' -SQLITE_EDGE_FILE = '../Data/temp/edges.sqlite' -DIFF_EDGE_FILE = '../Data/temp/edges-diff.txt' # the difference between two edge files from yesterday and from today - -TARGET_TF_FILE = '../Data/information/target_tf.txt' +# From get_TPM_by_salmon.py +SALMON = '/home/lanhui/brain/Salmon/Salmon-0.7.2_linux_x86_64/bin/salmon' # salmon software path +SALMON_INDEX = '/home/lanhui/brain/Salmon/salmon_index' +TRANSCRIPTOME = '/home/lanhui/brain/Salmon/Arabidopsis_thaliana.TAIR10.cdna.all.fa' +SALMON_MAP_RESULT_DIR = '../Data/temp/salmon_map_result' +KMER = 31 + +# From download_and_map.py +DAILY_MAP_NUMBER = 4 # download this many samples each time. I have tested the values of 3, 4, 5, 8. +MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB +RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq +DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs +IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size. +MAPPED_RDATA_DIR = '../Data/R/Mapped/public' # mapped RNA-seq (file names ended with _quant.txt) go here +RAW_RDATA_DIR = '/disk1/Data/R/Raw' # downloaded files go here, was "../Data/R/Raw" (now almost full). + +# From update_network.py +# Don'T change the following paths and names +HISTORY_DIR = '../Data/history/edges/many_targets' # each edge file contains edges for many targets +HISTORY_DIR2 = '../Data/history/edges/one_target' # edges.txt.* files are here, all edge files have the name edges.txt.*, the leading string 'edges.txt' must be present. +TIMESTAMP_FILE = '../Data/log/file_timestamp.txt' # record last modified time of several important files +SAMPLE_SIZE_FILE = '../Data/log/total.samples.txt' # each line contains a date and the number of samples on and after that date +TEMP_DIR = '../Data/temp' + +PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt' +PARAMETER_FOR_BUILDRMATRIX = '../Data/parameter/parameter_for_buildRmatrix.txt' +PARAMETER_FOR_NET = '../Data/parameter/parameter_for_net.txt' +PARAMETER_FOR_NET_TRAVADB_STRESS = '../Data/parameter/parameter_for_net_travadb_stress.txt' +PARAMETER_FOR_NET_TRAVADB_MAP = '../Data/parameter/parameter_for_net_travadb_map.txt' +PARAMETER_FOR_NET_MILD_DROUGHT = '../Data/parameter/parameter_for_net_mild_drought.txt' +PARAMETER_FOR_NET_WIGGELAB_DIURNAL = '../Data/parameter/parameter_for_net_wiggelab_diurnal.txt' + +BINDING_FILE = '../Data/history/bind/binding.txt' +TPM_FILE = '../Data/history/expr/TPM.txt' # gene expression data + +BUILDRMATRIX_RENEW_INTERVAL = 28 # check every 28 days for updating TPM.txt +MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt +UPDATE_NETWORK_LOG_FILE = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time. +NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt' + +RNA_SEQ_INFO_DATABASE = '../Data/information/rnaseq_info_database.txt' # same as RNA_SEQ_INFO_FILE +RNA_SEQ_INFO_DATABASE_JSON = '../Data/information/rnaseq_info_database.json' + +GENE_ID_FIRST_TWO_LETTERS = 'AT' +MEMORY_STRENGTH = 365 # memory retention power (larger value means better memory) + +# +MAPPED_CDATA_DIR = '../Data/C/Mapped' # mapped ChIp-seq data + +# Used in merge_edges.py +EDGE_POOL_DIR = '../Data/history/edge_pool' +MERGED_EDGE_FILE = '../Data/temp/edges.txt' +SQLITE_EDGE_FILE = '../Data/temp/edges.sqlite' +DIFF_EDGE_FILE = '../Data/temp/edges-diff.txt' # the difference between two edge files from yesterday and from today + +TARGET_TF_FILE = '../Data/information/target_tf.txt' -- cgit v1.2.1