diff options
| author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-10-26 15:13:16 +0800 | 
|---|---|---|
| committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-10-26 15:13:16 +0800 | 
| commit | 9ce7c87b6cb3e011c59754c184bfeaf66c60e4d0 (patch) | |
| tree | f968da69b06425fc18e19b4d9437e0d401d52f02 | |
| parent | 44fec3d53dbd961c3a6598c57bd39f9fc9358171 (diff) | |
Regular maintenance
| -rw-r--r-- | Code/download_and_map.py | 8 | 
1 files changed, 4 insertions, 4 deletions
| diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 8c5c4a7..e60b150 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -78,7 +78,7 @@ def make_download_list(mapped_dir, rna_data_info_dict):      '''      Make next n sample IDs.  These samples must have not been downloaded yet. -    all_run_ids - a list of NextGen-Seq IDs to select from +    all_run_ids - a list of NextGen-Seq IDs to select from, not necessarily RNA-seq       mapped_dir - contain all mapped samples      rna_data_info_dict - a dictionary containing all RNA-seq samples from ENA.      ''' @@ -86,10 +86,10 @@ def make_download_list(mapped_dir, rna_data_info_dict):      result = []      mapped_files = glob_files(mapped_dir, '*_quant.txt')      mapped_run_ids = get_list(DOWNLOADED_SRA_ID_LOG_FILE) -    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these files are too small +    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these data files are too small      for run_id in sorted(rna_data_info_dict.keys(), reverse=True): # SRR first, then ERR, then DRR -        include_me = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False -        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me: # not mapped yet and is RNA-seq +        include_me_because_i_am_rnaseq = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False +        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me_because_i_am_rnaseq: # not mapped yet and is RNA-seq              result.append(run_id)      return result | 
