summaryrefslogtreecommitdiff
path: root/Code/validate_parameter_for_buildCmatrix.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/validate_parameter_for_buildCmatrix.py')
-rw-r--r--Code/validate_parameter_for_buildCmatrix.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/Code/validate_parameter_for_buildCmatrix.py b/Code/validate_parameter_for_buildCmatrix.py
new file mode 100644
index 0000000..ced6062
--- /dev/null
+++ b/Code/validate_parameter_for_buildCmatrix.py
@@ -0,0 +1,85 @@
+# Usage: python validate_parameter_for_buildCmatrix.py
+# Purpose: make sure all files exist.
+# Hui 24 Jan 2018 Jinhua
+
+import os, sys
+import numpy as np
+import glob
+import time
+import subprocess
+from datetime import datetime
+
+def get_value(s, delimit):
+ lst = s.split(delimit, 1) # only split at the first delimit
+ return lst[1].strip()
+
+def get_key_value(s):
+ lst = s.split('=')
+ k, v = lst[0], lst[1]
+ return (k.strip(), v.strip())
+
+def validate_gene_file(fname):
+ f = open(fname)
+ lines = f.readlines()
+ f.close()
+ for line in lines: # check all lines
+ line = line.strip()
+ lst = line.split('\t')
+ if len(lst) < 6:
+ print('Not enought fields: %s. Only %d are given. Each line must have gene_id, gene_name, chr, start, end, strand, description (optional). See prepare_gene_file.py in the documentation on how to prepare this file.' % (line, len(lst)))
+ sys.exit()
+
+def validate_parameter_for_buildcmatrix(fname):
+ # first the file must exist
+ if not os.path.exists(fname):
+ print('CANNOT FIND %s.' % (fname))
+ sys.exit()
+ f = open(fname)
+ lines = f.readlines()
+ f.close()
+ d = {}
+ location_count = 0
+ for line in lines:
+ line = line.strip()
+ if line.startswith('%%'):
+ k, v = get_key_value(line[2:])
+ d[k] = v
+ if k == 'GENE_FILE' or k == 'CHR_INFO':
+ if not os.path.exists(v):
+ print('%s not exists.' % (v))
+ sys.exit()
+ if k == 'GENE_FILE':
+ validate_gene_file(v)
+ if k == 'DESTINATION':
+ if not os.path.isdir(v):
+ print('%s not exists.' % (v))
+ sys.exit()
+ if k == 'TARGET_RANGE':
+ if int(v) <= 0:
+ print('Target range (%d) must be greater than 0.' % (v))
+ sys.exit()
+ if line.startswith('LOCATION:'):
+ v = get_value(line, ':')
+ location_count += 1
+ if not os.path.exists(v):
+ print('Location %s does not exists.' % (v))
+ #sys.exit()
+
+ if not 'GENE_FILE' in d:
+ print('Must specify GENE_FILE.')
+ sys.exit()
+ if not 'DESTINATION' in d:
+ print('Must specify DESTINATION.')
+ sys.exit()
+ if not 'CHR_INFO' in d:
+ print('Must specify CHR_INFO.')
+ sys.exit()
+ if location_count == 0:
+ print('Must contain at least one ChIP-seq.')
+ sys.exit()
+
+## main
+
+PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt'
+validate_parameter_for_buildcmatrix(PARAMETER_FOR_BUILDCMATRIX)
+