summaryrefslogtreecommitdiff
path: root/Code/exclude_edges.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
commit97fdefab064f63642fa3ece05b807d29b459df31 (patch)
treea058530023224f3e35b1783996f3530c80c04bc5 /Code/exclude_edges.py
brain: add python and R code to local repository.
Diffstat (limited to 'Code/exclude_edges.py')
-rw-r--r--Code/exclude_edges.py55
1 files changed, 55 insertions, 0 deletions
diff --git a/Code/exclude_edges.py b/Code/exclude_edges.py
new file mode 100644
index 0000000..1fb5f77
--- /dev/null
+++ b/Code/exclude_edges.py
@@ -0,0 +1,55 @@
+# Usage: python exclude_edges.py edges.txt
+#
+# Purpose: Exclude the edge whose TF is in exclude_tf_list, since we want to hide unpublished data. Don't distinguish +/- edges.
+# For a TF-Target pair, use the edge that has largest value of metric. If correlation is negative, use its absolute value.
+#
+# Created by Hui on 5 Jan 2018
+
+import os, sys
+
+def remove_minus(s):
+ ''' Remove the minus sign in s '''
+ index = s.find('-')
+ return s[index+1:]
+
+
+def neg2pos(s):
+ s = s.strip()
+ lst = s.split('\t')
+ x = float(lst[2])
+ if x < 0:
+ lst[2] = remove_minus(lst[2])
+ return '\t'.join(lst)
+
+
+def make_edge_dict(fname, exclude_lst):
+ d = {}
+ f = open(fname)
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ tf_id = lst[1].split()[0]
+ if not tf_id in exclude_lst:
+ target_id = lst[0].split()[0]
+ metric = float(lst[8])
+ k = tf_id + '.' + target_id
+ if not k in d:
+ d[k] = {}
+ d[k]['metric'] = metric
+ d[k]['line'] = neg2pos(line) # make the third field (correlation) positive if it is negative. Indicate influence, not activation/repression.
+ else:
+ if d[k]['metric'] < metric:
+ d[k]['metric'] = metric
+ d[k]['line'] = neg2pos(line)
+ f.close()
+ return d
+
+
+# main
+exclude_tf_list = ['AT4G26840', 'AT3G18550']
+edge_file_name = sys.argv[1]
+d = make_edge_dict(edge_file_name, exclude_tf_list)
+f = open(edge_file_name, 'w') # this will make edge.txt empty
+for k in sorted(d.keys()):
+ f.write('%s\n' % d[k]['line'])
+f.close()