From 97fdefab064f63642fa3ece05b807d29b459df31 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Wed, 4 Dec 2019 19:03:19 +0800 Subject: brain: add python and R code to local repository. --- Code/exclude_edges.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Code/exclude_edges.py (limited to 'Code/exclude_edges.py') diff --git a/Code/exclude_edges.py b/Code/exclude_edges.py new file mode 100644 index 0000000..1fb5f77 --- /dev/null +++ b/Code/exclude_edges.py @@ -0,0 +1,55 @@ +# Usage: python exclude_edges.py edges.txt +# +# Purpose: Exclude the edge whose TF is in exclude_tf_list, since we want to hide unpublished data. Don't distinguish +/- edges. +# For a TF-Target pair, use the edge that has largest value of metric. If correlation is negative, use its absolute value. +# +# Created by Hui on 5 Jan 2018 + +import os, sys + +def remove_minus(s): + ''' Remove the minus sign in s ''' + index = s.find('-') + return s[index+1:] + + +def neg2pos(s): + s = s.strip() + lst = s.split('\t') + x = float(lst[2]) + if x < 0: + lst[2] = remove_minus(lst[2]) + return '\t'.join(lst) + + +def make_edge_dict(fname, exclude_lst): + d = {} + f = open(fname) + for line in f: + line = line.strip() + lst = line.split('\t') + tf_id = lst[1].split()[0] + if not tf_id in exclude_lst: + target_id = lst[0].split()[0] + metric = float(lst[8]) + k = tf_id + '.' + target_id + if not k in d: + d[k] = {} + d[k]['metric'] = metric + d[k]['line'] = neg2pos(line) # make the third field (correlation) positive if it is negative. Indicate influence, not activation/repression. + else: + if d[k]['metric'] < metric: + d[k]['metric'] = metric + d[k]['line'] = neg2pos(line) + f.close() + return d + + +# main +exclude_tf_list = ['AT4G26840', 'AT3G18550'] +edge_file_name = sys.argv[1] +d = make_edge_dict(edge_file_name, exclude_tf_list) +f = open(edge_file_name, 'w') # this will make edge.txt empty +for k in sorted(d.keys()): + f.write('%s\n' % d[k]['line']) +f.close() -- cgit v1.2.1