summaryrefslogtreecommitdiff
path: root/Code
diff options
context:
space:
mode:
Diffstat (limited to 'Code')
-rw-r--r--Code/make_parameter_rnaseq.py17
1 files changed, 16 insertions, 1 deletions
diff --git a/Code/make_parameter_rnaseq.py b/Code/make_parameter_rnaseq.py
index 1fe9c6e..18ef568 100644
--- a/Code/make_parameter_rnaseq.py
+++ b/Code/make_parameter_rnaseq.py
@@ -8,7 +8,8 @@
import sys, os, glob, json
import fnmatch, re
-from configure import RNA_SEQ_INFO_FILE
+from datetime import datetime
+from configure import RNA_SEQ_INFO_FILE, UPDATE_NETWORK_LOG_FILE
NON_ZERO_RATIO = 0.2 # omit *_quant.txt files with too many zeros.
QUANT_PATH = ['../Data/R/Mapped/public', '../Data/R/Mapped/inhouse', '../Data/R/Mapped/other'] # places where all _quant.txt reside. _quant.txt in sub-directories will also be used.
@@ -76,6 +77,8 @@ def non_zero_ratio(fname):
for line in lines[1:]:
line = line.strip()
lst = line.split()
+ if len(lst) < 4: # this should not occur. Report error if occurred.
+ return -1
tpm = lst[3]
if not tpm == '0' and not 'nan' in tpm:
non_zero_count += 1
@@ -98,6 +101,16 @@ def read_ena_data_info_json(fname):
d[run_id] = 1
return d
+
+def write_log_file(s, fname):
+ f = open(fname, 'a')
+ curr_time = datetime.now().strftime('%Y-%m-%d %H:%M')
+ s = '[' + curr_time + ']: ' + s
+ if not '\n' in s:
+ s += '\n'
+ f.write(s)
+ f.close()
+
### main
if not os.path.exists(RNA_SEQ_INFO_FILE):
print('make_parameter_rnaseq.py: you must provide %s. See parse_ena_xml.py on how to make it.' % (RNA_SEQ_INFO_FILE))
@@ -156,6 +169,8 @@ for fn in sorted(quant_files):
print('')
include_count += 1
already_added_dict[myid2] = 'yes'
+ elif nzr < 0:
+ write_log_file('[make_parameter_rnaseq.py] Warning: incomplete line in file %s' % (fn), UPDATE_NETWORK_LOG_FILE)
else:
#print('%s has too many zeroes. ignore.' % (fn))
pass