summaryrefslogtreecommitdiff
path: root/Code
diff options
context:
space:
mode:
Diffstat (limited to 'Code')
-rw-r--r--Code/download_and_map.py22
1 files changed, 19 insertions, 3 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index 178797b..cf34026 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -12,7 +12,7 @@
# 23 DEC 2016, hui, slcu. Updated: 9 Feb 2017
# Last modified 10 APR 2017, hui, slcu
# Last reviewed 31 July 2018
-# Last revised 10 Feb 2021
+# Last revised 3 May 2025
import os, signal, sys, glob, json
import fnmatch
@@ -149,6 +149,18 @@ def get_sample_id(fname):
return lst[0]
+def has_no_more_time(times):
+ ''' Return True if the start time is 18 hours ago. '''
+ if len(times) < 1:
+ return False
+ first_time = times[0]
+ now_time = datetime.now()
+ time_diff = now_time - first_time
+ if time_diff.total_seconds()/3600 > 18: # started 18 hours ago, should stop now, let the machine rest for 6 hours
+ return True
+ return False
+
+
def download_and_map_data(lst, daily_map_num, dest):
''' Download data from ENA; fast (but can be interruptive) '''
downloaded_files = [] # a list of paths to downloaded files, small files (size less than MIN_FASTQ_FILE_SIZE) won't be included in the list
@@ -158,6 +170,7 @@ def download_and_map_data(lst, daily_map_num, dest):
return downloaded_files, map_list
count = 0
+ times = [datetime.now()]
for line in lst: # lst - a list of run IDs
run_id = line
dir1 = line[0:6]
@@ -189,6 +202,7 @@ def download_and_map_data(lst, daily_map_num, dest):
time.sleep(1)
+ # download
curr_lst = []
for link in url_lst:
sz = get_remote_file_size(link)
@@ -204,7 +218,7 @@ def download_and_map_data(lst, daily_map_num, dest):
sample_id = get_sample_id(file_name)
write_download_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n')
-
+ # map
print(curr_lst)
if curr_lst != []:
salmon_map(curr_lst)
@@ -218,7 +232,9 @@ def download_and_map_data(lst, daily_map_num, dest):
os.remove(f)
time.sleep(1)
- if count >= daily_map_num:
+ times.append(datetime.now())
+
+ if count >= daily_map_num and has_no_more_time(times):
return downloaded_files, map_list
time.sleep(3)