summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLan Hui <lanhui@zjnu.edu.cn>2025-10-30 15:38:31 +0800
committerLan Hui <lanhui@zjnu.edu.cn>2025-10-30 15:38:31 +0800
commit971ba6590a8187c94bebf71234bdd5c4bd7c5db0 (patch)
tree26fba651375d69fa4889e5d0c98445c07449aae6
parentc932d81d3ed160f07c758180aada5efb51899db0 (diff)
Regular maintenanceHEADmaster
-rw-r--r--Code/download_and_map.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index bb46c81..42e9fb0 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -81,6 +81,7 @@ def make_download_list(mapped_dir, rna_data_info_dict, experiment_library_info_d
all_run_ids - a list of NextGen-Seq IDs to select from, not necessarily RNA-seq
mapped_dir - contain all mapped samples
rna_data_info_dict - a dictionary containing all RNA-seq samples from ENA.
+ experiment_library_info_dict - a dictionary containing library strategy/source info for experiment ids
'''
result = []
@@ -91,9 +92,9 @@ def make_download_list(mapped_dir, rna_data_info_dict, experiment_library_info_d
include_me_because_i_am_rnaseq = True if rna_data_info_dict[run_id]['library_strategy'].lower() == 'rna-seq' and rna_data_info_dict[run_id]['library_source'].lower() == 'transcriptomic' else False
associated_experiment_id = rna_data_info_dict[run_id]['experiment_id']
if associated_experiment_id in experiment_library_info_dict:
- is_rnaseq = 'rna-seq' in experiment_library_info_dict[associated_experiment_id]['library_strategy'].lower() and 'transcriptomic' in experiment_library_info_dict[associated_experiment_id]['library_source']
+ is_rnaseq = 'rna-seq' in experiment_library_info_dict[associated_experiment_id]['library_strategy'].lower() and 'transcriptomic' in experiment_library_info_dict[associated_experiment_id]['library_source'].lower()
include_me_because_i_am_rnaseq = include_me_because_i_am_rnaseq or is_rnaseq
- if not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids) and include_me_because_i_am_rnaseq: # not mapped yet and is RNA-seq
+ if include_me_because_i_am_rnaseq and not (run_id + '_quant.txt') in mapped_files and (not run_id in result) and (not run_id in small_ids) and (not run_id in mapped_run_ids): # is RNA-seq and not mapped yet
result.append(run_id)
return result
@@ -437,6 +438,7 @@ else:
download_list = make_download_list(MAPPED_RDATA_DIR, rna_data_info_dict, experiment_library_info_dict)
print('[download_and_map.py] There are %d run IDs from which you could select %d of them.' % (len(download_list), DAILY_MAP_NUMBER))
+
# Make a record in log.txt
curr_time = datetime.now().strftime('%Y-%m-%d_%H%M') # append date info to newly created directories
write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'START at %s (remaining %d)\n' % (curr_time, len(download_list)))