summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Code/download_and_map.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index 8c5c4a7..e60b150 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -78,7 +78,7 @@ def make_download_list(mapped_dir, rna_data_info_dict):
'''
Make next n sample IDs. These samples must have not been downloaded yet.
- all_run_ids - a list of NextGen-Seq IDs to select from
+ all_run_ids - a list of NextGen-Seq IDs to select from, not necessarily RNA-seq
mapped_dir - contain all mapped samples
rna_data_info_dict - a dictionary containing all RNA-seq samples from ENA.
'''
@@ -86,10 +86,10 @@ def make_download_list(mapped_dir, rna_data_info_dict):
result = []
mapped_files = glob_files(mapped_dir, '*_quant.txt')
mapped_run_ids = get_list(DOWNLOADED_SRA_ID_LOG_FILE)
- small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these files are too small
+ small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these data files are too small
for run_id in sorted(rna_data_info_dict.keys(), reverse=True): # SRR first, then ERR, then DRR
- include_me = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False
- if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me: # not mapped yet and is RNA-seq
+ include_me_because_i_am_rnaseq = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False
+ if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me_because_i_am_rnaseq: # not mapped yet and is RNA-seq
result.append(run_id)
return result