summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2025-04-09 15:45:21 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2025-04-09 15:45:21 +0800
commit75ec1d1e8c10fec0d4a29029fe0e35813271e020 (patch)
tree7ff18d679da0806dbdc81b8c5d3caa197673e840
parent078feb13937fe6566307e4ad612a7bc93b7fcf9c (diff)
Maintenance
-rw-r--r--Code/configure.py2
-rw-r--r--Code/download_and_map.py5
2 files changed, 3 insertions, 4 deletions
diff --git a/Code/configure.py b/Code/configure.py
index 5c89cae..a798e4d 100644
--- a/Code/configure.py
+++ b/Code/configure.py
@@ -12,7 +12,7 @@ RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data
DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs
IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt' # store SRA IDs with small file size.
MAPPED_RDATA_DIR = '../Data/R/Mapped/public' # mapped RNA-seq (file names ended with _quant.txt) go here
-RAW_RDATA_DIR = '/disk1/Data/R/Raw' # downloaded files go here, was "../Data/R/Raw" (now almost full).
+RAW_RDATA_DIR = '/disk1/Data/R/Raw' # downloaded files go here, was "../Data/R/Raw"
# From update_network.py
# Don'T change the following paths and names
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index 3f6e14b..c21e114 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -1,7 +1,6 @@
# Usage: python dowload_and_map.py
# python download_and_map.py run_ids.txt
#
-# Edit DAILY_DOWNLOAD_NUMBER and MIN_FILE_SIZE
#
# This program checks RNA_SEQ_INFO_FILE for not yet downloaded, *public* RNA-seq data, make a list of them, download and map using Salmon. It is very important to prepare
# RNA_SEQ_INFO_FILE (see parse_end_xlm.py). In fact, only first column of RNA_SEQ_INFO_FILE is required in this file, that is a list of RNA-seq IDs.
@@ -25,8 +24,6 @@ from datetime import datetime
##########################################################################################
from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR
-FASTQ_DUMP_PATH = '/home/hui/software/sratoolkit/sratoolkit.2.8.0-ubuntu64/bin/fastq-dump'
-
##########################################################################################
def glob_files(directory, pattern):
@@ -229,8 +226,10 @@ def download_and_map_data(lst, daily_map_num, dest):
return downloaded_files, map_list
+
def download_data2(lst, dest):
''' Download data from SRA, slow '''
+ FASTQ_DUMP_PATH = '/home/hui/software/sratoolkit/sratoolkit.2.8.0-ubuntu64/bin/fastq-dump'
if not os.path.exists(FASTQ_DUMP_PATH):
print('%s not exists.' % (FASTQ_DUMP_PATH))
sys.exit()