Regular maintenance

author: Lan Hui <lanhui@zjnu.edu.cn> 2025-10-26 15:13:16 +0800
committer: Lan Hui <lanhui@zjnu.edu.cn> 2025-10-26 15:13:16 +0800
commit: 9ce7c87b6cb3e011c59754c184bfeaf66c60e4d0 (patch)
tree: f968da69b06425fc18e19b4d9437e0d401d52f02
parent: 44fec3d53dbd961c3a6598c57bd39f9fc9358171 (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index 8c5c4a7..e60b150 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -78,7 +78,7 @@ def make_download_list(mapped_dir, rna_data_info_dict):
     '''
     Make next n sample IDs.  These samples must have not been downloaded yet.
 
-    all_run_ids - a list of NextGen-Seq IDs to select from
+    all_run_ids - a list of NextGen-Seq IDs to select from, not necessarily RNA-seq 
     mapped_dir - contain all mapped samples
     rna_data_info_dict - a dictionary containing all RNA-seq samples from ENA.
     '''
@@ -86,10 +86,10 @@ def make_download_list(mapped_dir, rna_data_info_dict):
     result = []
     mapped_files = glob_files(mapped_dir, '*_quant.txt')
     mapped_run_ids = get_list(DOWNLOADED_SRA_ID_LOG_FILE)
-    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these files are too small
+    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these data files are too small
     for run_id in sorted(rna_data_info_dict.keys(), reverse=True): # SRR first, then ERR, then DRR
-        include_me = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False
-        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me: # not mapped yet and is RNA-seq
+        include_me_because_i_am_rnaseq = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False
+        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me_because_i_am_rnaseq: # not mapped yet and is RNA-seq
             result.append(run_id)
     return result
author	Lan Hui <lanhui@zjnu.edu.cn>	2025-10-26 15:13:16 +0800
committer	Lan Hui <lanhui@zjnu.edu.cn>	2025-10-26 15:13:16 +0800
commit	9ce7c87b6cb3e011c59754c184bfeaf66c60e4d0 (patch)
tree	f968da69b06425fc18e19b4d9437e0d401d52f02
parent	44fec3d53dbd961c3a6598c57bd39f9fc9358171 (diff)