From 3c41b72494d347e8c04ea4ad745c54b4afcbd1dd Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sun, 3 May 2020 20:56:52 +0800 Subject: download_and_map.py: when download_log.txt contains blank lines, this script will stop working. Make it work when the log file contains blank lines. These blank lines usually appear in the end of the log file. --- Code/download_and_map.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 983e98f..b32b4e5 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -65,11 +65,12 @@ def get_list(fname): d = {} for line in f: line = line.strip() - lst = line.split() - s = lst[0].strip() # SRR, ERR, or DRR id - if (not s in d) and ('SRR' in s or 'ERR' in s or 'DRR' in s): - d[s] = 1 - result.append(s) + if line != '': + lst = line.split() + s = lst[0].strip() # SRR, ERR, or DRR id + if (not s in d) and ('SRR' in s or 'ERR' in s or 'DRR' in s): + d[s] = 1 + result.append(s) f.close() return result # only return unique elements @@ -293,21 +294,22 @@ def write_network_log_file(s, fname): def last_session_finished(fname): - ''' return true if log file ends with DONE. ''' + ''' Return True iff the last non-empty line of fname starts with DONE. ''' if not os.path.exists(fname): return True f = open(fname) lines = f.readlines() f.close() - last_line = lines[-1] - if last_line.strip() == '': # a newline - print('[download_and_map.py] Last line in file %s is empty. The last line must start with DONE.' % (fname)) - sys.exit() - lst = last_line.split() - if lst[0] == 'DONE': - return True - else: - return False + # Check last status + last_status = '' + for line in lines: + line = line.strip() + if line.upper().startswith('START'): + last_status = 'START' + if line.upper().startswith('DONE'): + last_status = 'DONE' + return last_status == 'DONE' + def read_ena_data_info(fname): d = {} @@ -359,7 +361,8 @@ if available_G < 2 * DAILY_MAP_NUMBER: sys.exit() if not last_session_finished(DOWNLOADED_SRA_ID_LOG_FILE): # last session not finished - print('[download_and_map.py] last downloading and mapping session not finished yet. You can edit file %s to remove last START at.' % (DOWNLOADED_SRA_ID_LOG_FILE)) + s = '[download_and_map.py] last downloading and mapping session not finished yet. Check file %s for details.' % (DOWNLOADED_SRA_ID_LOG_FILE) + write_network_log_file(s, UPDATE_NETWORK_LOG_FILE) sys.exit() rna_data_info_dict = read_ena_data_info_json(RNA_SEQ_INFO_FILE) # rna_data_info_dict contains only RNA-seq IDs. -- cgit v1.2.1