diff options
author | Lan Hui <lanhui@zjnu.edu.cn> | 2025-05-03 16:04:58 +0800 |
---|---|---|
committer | Lan Hui <lanhui@zjnu.edu.cn> | 2025-05-03 16:04:58 +0800 |
commit | 809c860264218a5e2605424af28781871b723f4f (patch) | |
tree | bf36f1ae6a696cefc1c92ac57e23dc6f861928ef | |
parent | afd2850262ec90208a439b66a1876041c6d9c7d8 (diff) |
download_and_map.py: download and map more if there is still time, even after having processed DAILY_MAP_NUM
-rw-r--r-- | Code/download_and_map.py | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 178797b..cf34026 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -12,7 +12,7 @@ # 23 DEC 2016, hui, slcu. Updated: 9 Feb 2017 # Last modified 10 APR 2017, hui, slcu # Last reviewed 31 July 2018 -# Last revised 10 Feb 2021 +# Last revised 3 May 2025 import os, signal, sys, glob, json import fnmatch @@ -149,6 +149,18 @@ def get_sample_id(fname): return lst[0] +def has_no_more_time(times): + ''' Return True if the start time is 18 hours ago. ''' + if len(times) < 1: + return False + first_time = times[0] + now_time = datetime.now() + time_diff = now_time - first_time + if time_diff.total_seconds()/3600 > 18: # started 18 hours ago, should stop now, let the machine rest for 6 hours + return True + return False + + def download_and_map_data(lst, daily_map_num, dest): ''' Download data from ENA; fast (but can be interruptive) ''' downloaded_files = [] # a list of paths to downloaded files, small files (size less than MIN_FASTQ_FILE_SIZE) won't be included in the list @@ -158,6 +170,7 @@ def download_and_map_data(lst, daily_map_num, dest): return downloaded_files, map_list count = 0 + times = [datetime.now()] for line in lst: # lst - a list of run IDs run_id = line dir1 = line[0:6] @@ -189,6 +202,7 @@ def download_and_map_data(lst, daily_map_num, dest): time.sleep(1) + # download curr_lst = [] for link in url_lst: sz = get_remote_file_size(link) @@ -204,7 +218,7 @@ def download_and_map_data(lst, daily_map_num, dest): sample_id = get_sample_id(file_name) write_download_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n') - + # map print(curr_lst) if curr_lst != []: salmon_map(curr_lst) @@ -218,7 +232,9 @@ def download_and_map_data(lst, daily_map_num, dest): os.remove(f) time.sleep(1) - if count >= daily_map_num: + times.append(datetime.now()) + + if count >= daily_map_num and has_no_more_time(times): return downloaded_files, map_list time.sleep(3) |