diff options
| -rw-r--r-- | Code/download_and_map.py | 8 | 
1 files changed, 4 insertions, 4 deletions
| diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 8c5c4a7..e60b150 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -78,7 +78,7 @@ def make_download_list(mapped_dir, rna_data_info_dict):      '''      Make next n sample IDs.  These samples must have not been downloaded yet. -    all_run_ids - a list of NextGen-Seq IDs to select from +    all_run_ids - a list of NextGen-Seq IDs to select from, not necessarily RNA-seq       mapped_dir - contain all mapped samples      rna_data_info_dict - a dictionary containing all RNA-seq samples from ENA.      ''' @@ -86,10 +86,10 @@ def make_download_list(mapped_dir, rna_data_info_dict):      result = []      mapped_files = glob_files(mapped_dir, '*_quant.txt')      mapped_run_ids = get_list(DOWNLOADED_SRA_ID_LOG_FILE) -    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these files are too small +    small_ids = get_list(IGNORED_SRA_ID_LOG_FILE) # these data files are too small      for run_id in sorted(rna_data_info_dict.keys(), reverse=True): # SRR first, then ERR, then DRR -        include_me = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False -        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me: # not mapped yet and is RNA-seq +        include_me_because_i_am_rnaseq = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and  rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False +        if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me_because_i_am_rnaseq: # not mapped yet and is RNA-seq              result.append(run_id)      return result | 
