summaryrefslogtreecommitdiff
path: root/Code/configure.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2025-04-13 16:08:17 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2025-04-13 16:08:17 +0800
commit7d161d428463ac865459c251a820d85085a2c5fb (patch)
treeb334d1d0b44e47b81b1ba6043250429da6c5f559 /Code/configure.py
parent36891c55666c009a3c2e106badd81bf97d971abe (diff)
Parse ENA records XML files. It seems that XML structure for experiment, study and sample has changed.
Diffstat (limited to 'Code/configure.py')
-rw-r--r--Code/configure.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/Code/configure.py b/Code/configure.py
index a798e4d..34446c3 100644
--- a/Code/configure.py
+++ b/Code/configure.py
@@ -8,6 +8,11 @@ KMER = 31
# From download_and_map.py
DAILY_MAP_NUMBER = 10 # download this many samples each time. I have tested the values of 3, 4, 5, 8.
MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB
+INFO_DIR = '../Data/information/'
+ENA_RECORDS_READ_RUN = '../Data/information/ena_read_run.xml'
+ENA_RECORDS_READ_EXPERIMENT = '../Data/information/ena_read_experiment.xml'
+ENA_RECORDS_SAMPLE = '../Data/information/ena_sample.xml'
+ENA_RECORDS_STUDY = '../Data/information/ena_study.xml'
RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq
DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs
IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size.