From 97fdefab064f63642fa3ece05b807d29b459df31 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Wed, 4 Dec 2019 19:03:19 +0800 Subject: brain: add python and R code to local repository. --- Code/validate_parameter_for_buildCmatrix.py | 85 +++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 Code/validate_parameter_for_buildCmatrix.py (limited to 'Code/validate_parameter_for_buildCmatrix.py') diff --git a/Code/validate_parameter_for_buildCmatrix.py b/Code/validate_parameter_for_buildCmatrix.py new file mode 100644 index 0000000..ced6062 --- /dev/null +++ b/Code/validate_parameter_for_buildCmatrix.py @@ -0,0 +1,85 @@ +# Usage: python validate_parameter_for_buildCmatrix.py +# Purpose: make sure all files exist. +# Hui 24 Jan 2018 Jinhua + +import os, sys +import numpy as np +import glob +import time +import subprocess +from datetime import datetime + +def get_value(s, delimit): + lst = s.split(delimit, 1) # only split at the first delimit + return lst[1].strip() + +def get_key_value(s): + lst = s.split('=') + k, v = lst[0], lst[1] + return (k.strip(), v.strip()) + +def validate_gene_file(fname): + f = open(fname) + lines = f.readlines() + f.close() + for line in lines: # check all lines + line = line.strip() + lst = line.split('\t') + if len(lst) < 6: + print('Not enought fields: %s. Only %d are given. Each line must have gene_id, gene_name, chr, start, end, strand, description (optional). See prepare_gene_file.py in the documentation on how to prepare this file.' % (line, len(lst))) + sys.exit() + +def validate_parameter_for_buildcmatrix(fname): + # first the file must exist + if not os.path.exists(fname): + print('CANNOT FIND %s.' % (fname)) + sys.exit() + f = open(fname) + lines = f.readlines() + f.close() + d = {} + location_count = 0 + for line in lines: + line = line.strip() + if line.startswith('%%'): + k, v = get_key_value(line[2:]) + d[k] = v + if k == 'GENE_FILE' or k == 'CHR_INFO': + if not os.path.exists(v): + print('%s not exists.' % (v)) + sys.exit() + if k == 'GENE_FILE': + validate_gene_file(v) + if k == 'DESTINATION': + if not os.path.isdir(v): + print('%s not exists.' % (v)) + sys.exit() + if k == 'TARGET_RANGE': + if int(v) <= 0: + print('Target range (%d) must be greater than 0.' % (v)) + sys.exit() + if line.startswith('LOCATION:'): + v = get_value(line, ':') + location_count += 1 + if not os.path.exists(v): + print('Location %s does not exists.' % (v)) + #sys.exit() + + if not 'GENE_FILE' in d: + print('Must specify GENE_FILE.') + sys.exit() + if not 'DESTINATION' in d: + print('Must specify DESTINATION.') + sys.exit() + if not 'CHR_INFO' in d: + print('Must specify CHR_INFO.') + sys.exit() + if location_count == 0: + print('Must contain at least one ChIP-seq.') + sys.exit() + +## main + +PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt' +validate_parameter_for_buildcmatrix(PARAMETER_FOR_BUILDCMATRIX) + -- cgit v1.2.1