summaryrefslogtreecommitdiff
path: root/Code/download_and_map.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/download_and_map.py')
-rw-r--r--Code/download_and_map.py27
1 files changed, 19 insertions, 8 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py
index 95a4753..3a22315 100644
--- a/Code/download_and_map.py
+++ b/Code/download_and_map.py
@@ -21,7 +21,7 @@ import re
from datetime import datetime
##########################################################################################
-from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR
+from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR
FASTQ_DUMP_PATH = '/home/hui/software/sratoolkit/sratoolkit.2.8.0-ubuntu64/bin/fastq-dump'
@@ -185,7 +185,7 @@ def download_and_map_data(lst, daily_map_num, dest):
url_lst = get_file_url('../Data/temp/wget_temp_file0.txt')
if url_lst == []:
- write_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
time.sleep(1)
@@ -202,7 +202,7 @@ def download_and_map_data(lst, daily_map_num, dest):
print('[download_and_map.py] IGNORE [%d MB] %s' % (int(sz/1000000.0), link))
file_name = os.path.basename(link)
sample_id = get_sample_id(file_name)
- write_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n')
print(curr_lst)
@@ -253,7 +253,7 @@ def download_data2(lst, dest):
for fname in glob.glob( os.path.join(dest, '%s*gz' % (run_id)) ) :
downloaded_files.append(fname)
else:
- write_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
return downloaded_files
@@ -273,7 +273,7 @@ def salmon_map(lst):
os.system(cmd)
-def write_log_file(fname, s):
+def write_download_log_file(fname, s):
if not os.path.exists(fname):
f = open(fname, 'w')
else:
@@ -282,6 +282,16 @@ def write_log_file(fname, s):
f.close()
+def write_network_log_file(s, fname):
+ f = open(fname, 'a')
+ curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
+ s = '[' + curr_time + ']: ' + s
+ if not '\n' in s:
+ s += '\n'
+ f.write(s)
+ f.close()
+
+
def last_session_finished(fname):
''' return true if log file ends with DONE. '''
if not os.path.exists(fname):
@@ -345,6 +355,7 @@ if not os.path.exists(RNA_SEQ_INFO_FILE):
available_G = 4 * os.statvfs('/home').f_bavail / (1024*1024) # compute available space (in G). Each block has 4k bytes, work for Linux/UNIX systems only
if available_G < 3 * DAILY_MAP_NUMBER:
print('[download_and_map.py] home directory does not have enough space (only %d G available) ' % (available_G))
+ write_network_log_file('[download_and_map.py] home directory does not have enough space (only %d G available).' % (available_G), UPDATE_NETWORK_LOG_FILE)
sys.exit()
if not last_session_finished(DOWNLOADED_SRA_ID_LOG_FILE): # last session not finished
@@ -365,7 +376,7 @@ else:
# Make a record in log.txt
curr_time = datetime.now().strftime('%Y-%m-%d_%H%M') # append date info to newly created directories
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'START at %s\n' % (curr_time))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'START at %s\n' % (curr_time))
# Download these RNA-seq IDs and map them using salmon
print('[download_and_map.py] Start downloading and mapping ...')
@@ -386,5 +397,5 @@ else:
print('[download_and_map.py] No quant files to move.')
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, '%s\n' % ('\n'.join(map_list)))
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, '%s\n' % ('\n'.join(map_list)))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time))