From 7d161d428463ac865459c251a820d85085a2c5fb Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sun, 13 Apr 2025 16:08:17 +0800 Subject: Parse ENA records XML files. It seems that XML structure for experiment, study and sample has changed. --- Code/configure.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Code/configure.py') diff --git a/Code/configure.py b/Code/configure.py index a798e4d..34446c3 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -8,6 +8,11 @@ KMER = 31 # From download_and_map.py DAILY_MAP_NUMBER = 10 # download this many samples each time. I have tested the values of 3, 4, 5, 8. MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB +INFO_DIR = '../Data/information/' +ENA_RECORDS_READ_RUN = '../Data/information/ena_read_run.xml' +ENA_RECORDS_READ_EXPERIMENT = '../Data/information/ena_read_experiment.xml' +ENA_RECORDS_SAMPLE = '../Data/information/ena_sample.xml' +ENA_RECORDS_STUDY = '../Data/information/ena_study.xml' RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size. -- cgit v1.2.1