diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2025-04-13 16:08:17 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2025-04-13 16:08:17 +0800 |
commit | 7d161d428463ac865459c251a820d85085a2c5fb (patch) | |
tree | b334d1d0b44e47b81b1ba6043250429da6c5f559 /Code/configure.py | |
parent | 36891c55666c009a3c2e106badd81bf97d971abe (diff) |
Parse ENA records XML files. It seems that XML structure for experiment, study and sample has changed.
Diffstat (limited to 'Code/configure.py')
-rw-r--r-- | Code/configure.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/Code/configure.py b/Code/configure.py index a798e4d..34446c3 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -8,6 +8,11 @@ KMER = 31 # From download_and_map.py DAILY_MAP_NUMBER = 10 # download this many samples each time. I have tested the values of 3, 4, 5, 8. MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB +INFO_DIR = '../Data/information/' +ENA_RECORDS_READ_RUN = '../Data/information/ena_read_run.xml' +ENA_RECORDS_READ_EXPERIMENT = '../Data/information/ena_read_experiment.xml' +ENA_RECORDS_SAMPLE = '../Data/information/ena_sample.xml' +ENA_RECORDS_STUDY = '../Data/information/ena_study.xml' RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size. |