diff options
-rw-r--r-- | Code/download_and_map.py | 26 | ||||
-rw-r--r-- | Code/publish_mapped_data.py | 33 | ||||
-rw-r--r-- | Code/test_redis_publish.py | 4 |
3 files changed, 41 insertions, 22 deletions
diff --git a/Code/download_and_map.py b/Code/download_and_map.py index 6372248..8c5c4a7 100644 --- a/Code/download_and_map.py +++ b/Code/download_and_map.py @@ -19,12 +19,11 @@ import fnmatch import time import re import shutil -import redis import json from datetime import datetime ########################################################################################## -from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR, REDIS_CHANNEL +from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR ########################################################################################## def glob_files(directory, pattern): @@ -387,20 +386,6 @@ def reverse_lines(fname, fname2): f2.write(content) -def publish(mapped_data_directory): - redis_host = os.getenv('REDIS_HOST', '127.0.0.1') - redis_password = os.getenv('REDIS_PASSWORD', '123456') - r = redis.Redis(host=redis_host, port=6379, password=redis_password, db=0) - for fname in glob.glob('%s/*_quant.txt' % (mapped_data_directory.rstrip('/'))): - try: - file_basename = os.path.basename(fname) - with open(fname, 'r') as f: - data = f.read() - r.publish(REDIS_CHANNEL, json.dumps({'filename':file_basename, 'data':data})) - time.sleep(1) - except Exception as e: - r.publish(REDIS_CHANNEL, json.dumps({'filename':'REDIS_ERROR.txt', 'data':str(e)})) - ## main # For filtering RNA-seq data @@ -420,6 +405,8 @@ if not last_session_finished(DOWNLOADED_SRA_ID_LOG_FILE): # last session not fin write_network_log_file(s, UPDATE_NETWORK_LOG_FILE) if age_of_file_in_hours(DOWNLOADED_SRA_ID_LOG_FILE) > 24: # add DONE to the log file anyway append_done(DOWNLOADED_SRA_ID_LOG_FILE) + write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time)) + reverse_lines(DOWNLOADED_SRA_ID_LOG_FILE, os.path.splitext(DOWNLOADED_SRA_ID_LOG_FILE)[0] + '_reversed.txt') kill_process('wget') sys.exit() @@ -447,15 +434,14 @@ curr_time = datetime.now().strftime('%Y-%m-%d_%H%M') # append date info to newly if not os.path.isdir(MAPPED_RDATA_DIR): os.makedirs(MAPPED_RDATA_DIR) -# after mapping is finished, move all resulting files to MAPPED_RDATA_DIR +# after mapping is finished, copy all resulting files to MAPPED_RDATA_DIR if glob.glob('%s/*_quant.txt' % (SALMON_MAP_RESULT_DIR.rstrip('/'))) != []: - publish(SALMON_MAP_RESULT_DIR) - cmd = 'mv %s/*_quant.txt %s' % (SALMON_MAP_RESULT_DIR.rstrip('/'), MAPPED_RDATA_DIR) + cmd = 'cp %s/*_quant.txt %s' % (SALMON_MAP_RESULT_DIR.rstrip('/'), MAPPED_RDATA_DIR) os.system(cmd) print('[download_and_map.py] Done. Check directory %s.' % (os.path.abspath(MAPPED_RDATA_DIR))) else: print('[download_and_map.py] No quant files to move.') - + write_network_log_file('[download_and_map.py] No quant files to move.', UPDATE_NETWORK_LOG_FILE) write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, '%s\n' % ('\n'.join(map_list))) write_download_log_file(DOWNLOADED_SRA_ID_LOG_FILE, 'DONE at %s\n' % (curr_time)) diff --git a/Code/publish_mapped_data.py b/Code/publish_mapped_data.py new file mode 100644 index 0000000..2bd655c --- /dev/null +++ b/Code/publish_mapped_data.py @@ -0,0 +1,33 @@ +import redis +from configure import DAILY_MAP_NUMBER, MIN_FASTQ_FILE_SIZE, RNA_SEQ_INFO_FILE, DOWNLOADED_SRA_ID_LOG_FILE, IGNORED_SRA_ID_LOG_FILE, UPDATE_NETWORK_LOG_FILE, MAPPED_RDATA_DIR, RAW_RDATA_DIR, SALMON_MAP_RESULT_DIR, REDIS_CHANNEL +import json, glob, os, time +from datetime import datetime +def publish(mapped_files): + redis_host = os.getenv('REDIS_HOST', '127.0.0.1') + redis_password = os.getenv('REDIS_PASSWORD', '123456') + print(redis_password) + r = redis.Redis(host=redis_host, port=6379, password=redis_password, db=0) + r.publish(REDIS_CHANNEL, json.dumps({'filename':'REDIS_START.txt', 'data':str(datetime.now())})) + for fname in mapped_files: + print(fname) + try: + file_basename = os.path.basename(fname) + with open(fname, 'r') as f: + data = f.read() + r.publish(REDIS_CHANNEL, json.dumps({'filename':file_basename, 'data':data})) + time.sleep(3) + except Exception as e: + print(f'ERROR {e}') + r.publish(REDIS_CHANNEL, json.dumps({'filename':'REDIS_ERROR.txt', 'data':str(e)})) + + +# after mapping is finished, move all resulting files to MAPPED_RDATA_DIR +mapped_files = glob.glob('%s/*_quant.txt' % (SALMON_MAP_RESULT_DIR.rstrip('/'))) +print(mapped_files) +publish(mapped_files) +if mapped_files != []: + cmd = 'mv %s/*_quant.txt %s' % (SALMON_MAP_RESULT_DIR.rstrip('/'), MAPPED_RDATA_DIR) + os.system(cmd) + print('[download_and_map.py] Done. Check directory %s.' % (os.path.abspath(MAPPED_RDATA_DIR))) +else: + print('[download_and_map.py] No quant files to move.') diff --git a/Code/test_redis_publish.py b/Code/test_redis_publish.py index 937833f..722e80e 100644 --- a/Code/test_redis_publish.py +++ b/Code/test_redis_publish.py @@ -8,7 +8,7 @@ def publish(number): r.publish(REDIS_CHANNEL, json.dumps({'filename':'a.txt', 'data':str(number)})) n = 0 -while True: +while n < 10: publish(n) - time.sleep(3) + time.sleep(2) n += 1 |