diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2020-01-18 18:45:28 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2020-01-18 18:45:28 +0800 |
commit | 5a70b3b498e64dc903b017d45be09a808cfb2b89 (patch) | |
tree | 6da82bf54d1673bd8ff72cf1d8d83666467d2736 /Code | |
parent | 6db297407cad6a49ae977498a4ea4749dc98059f (diff) |
download_and_map.py: write "no enough space" warning message to network log file
If there is no enough space left in the disk, download_and_map.py will refuse to download any data.
This can be quite mysterious for a maintainer.
So, write the reason to the network log file.
The reason is something like:
"[download_and_map.py] home directory does not have enough space (only 13 G available)."
-Hui
Diffstat (limited to 'Code')
-rw-r--r-- | Code/configure.py | 2 | ||||
-rw-r--r-- | Code/download_and_map.py | 27 |
2 files changed, 20 insertions, 9 deletions
diff --git a/Code/configure.py b/Code/configure.py index c740e98..ed6574d 100644 --- a/Code/configure.py +++ b/Code/configure.py @@ -6,7 +6,7 @@ SALMON_MAP_RESULT_DIR = '../Data/temp/salmon_map_result' KMER = 31
# From download_and_map.py
-DAILY_MAP_NUMBER = 5 # download this many samples each time. I have tested the values of 3, 4, 5, 8.
+DAILY_MAP_NUMBER = 4 # download this many samples each time. I have tested the values of 3, 4, 5, 8.
MIN_FASTQ_FILE_SIZE = 200000000 # in bytes, approximately 200MB
RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json' # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq
DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs
diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 95a4753..3a22315 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -21,7 +21,7 @@ import re from datetime import datetime
##########################################################################################
-from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR
+from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR
FASTQ_DUMP_PATH = '/home/hui/software/sratoolkit/sratoolkit.2.8.0-ubuntu64/bin/fastq-dump'
@@ -185,7 +185,7 @@ def download_and_map_data(lst, daily_map_num, dest): url_lst = get_file_url('../Data/temp/wget_temp_file0.txt')
if url_lst == []:
- write_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
time.sleep(1)
@@ -202,7 +202,7 @@ def download_and_map_data(lst, daily_map_num, dest): print('[download_and_map.py] IGNORE [%d MB] %s' % (int(sz/1000000.0), link))
file_name = os.path.basename(link)
sample_id = get_sample_id(file_name)
- write_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, sample_id+'\n')
print(curr_lst)
@@ -253,7 +253,7 @@ def download_data2(lst, dest): for fname in glob.glob( os.path.join(dest, '%s*gz' % (run_id)) ) :
downloaded_files.append(fname)
else:
- write_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
+ write_download_log_file(IGNORED_SRA_ID_LOG_FILE, run_id+'\n')
return downloaded_files
@@ -273,7 +273,7 @@ def salmon_map(lst): os.system(cmd)
-def write_log_file(fname, s):
+def write_download_log_file(fname, s):
if not os.path.exists(fname):
f = open(fname, 'w')
else:
@@ -282,6 +282,16 @@ def write_log_file(fname, s): f.close()
+def write_network_log_file(s, fname):
+ f = open(fname, 'a')
+ curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
+ s = '[' + curr_time + ']: ' + s
+ if not '\n' in s:
+ s += '\n'
+ f.write(s)
+ f.close()
+
+
def last_session_finished(fname):
''' return true if log file ends with DONE. '''
if not os.path.exists(fname):
@@ -345,6 +355,7 @@ if not os.path.exists(RNA_SEQ_INFO_FILE): available_G = 4 * os.statvfs('/home').f_bavail / (1024*1024) # compute available space (in G). Each block has 4k bytes, work for Linux/UNIX systems only
if available_G < 3 * DAILY_MAP_NUMBER:
print('[download_and_map.py] home directory does not have enough space (only %d G available) ' % (available_G))
+ write_network_log_file('[download_and_map.py] home directory does not have enough space (only %d G available).' % (available_G), UPDATE_NETWORK_LOG_FILE)
sys.exit()
if not last_session_finished(DOWNLOADED_SRA_ID_LOG_FILE): # last session not finished
@@ -365,7 +376,7 @@ else: # Make a record in log.txt
curr_time = datetime.now().strftime('%Y-%m-%d_%H%M') # append date info to newly created directories
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'START at %s\n' % (curr_time))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'START at %s\n' % (curr_time))
# Download these RNA-seq IDs and map them using salmon
print('[download_and_map.py] Start downloading and mapping ...')
@@ -386,5 +397,5 @@ else: print('[download_and_map.py] No quant files to move.')
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, '%s\n' % ('\n'.join(map_list)))
-write_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, '%s\n' % ('\n'.join(map_list)))
+write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time))
|