diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2019-12-04 19:03:19 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2019-12-04 19:03:19 +0800 |
commit | 97fdefab064f63642fa3ece05b807d29b459df31 (patch) | |
tree | a058530023224f3e35b1783996f3530c80c04bc5 /Code/exclude_edges.py |
brain: add python and R code to local repository.
Diffstat (limited to 'Code/exclude_edges.py')
-rw-r--r-- | Code/exclude_edges.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/Code/exclude_edges.py b/Code/exclude_edges.py new file mode 100644 index 0000000..1fb5f77 --- /dev/null +++ b/Code/exclude_edges.py @@ -0,0 +1,55 @@ +# Usage: python exclude_edges.py edges.txt
+#
+# Purpose: Exclude the edge whose TF is in exclude_tf_list, since we want to hide unpublished data. Don't distinguish +/- edges.
+# For a TF-Target pair, use the edge that has largest value of metric. If correlation is negative, use its absolute value.
+#
+# Created by Hui on 5 Jan 2018
+
+import os, sys
+
+def remove_minus(s):
+ ''' Remove the minus sign in s '''
+ index = s.find('-')
+ return s[index+1:]
+
+
+def neg2pos(s):
+ s = s.strip()
+ lst = s.split('\t')
+ x = float(lst[2])
+ if x < 0:
+ lst[2] = remove_minus(lst[2])
+ return '\t'.join(lst)
+
+
+def make_edge_dict(fname, exclude_lst):
+ d = {}
+ f = open(fname)
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ tf_id = lst[1].split()[0]
+ if not tf_id in exclude_lst:
+ target_id = lst[0].split()[0]
+ metric = float(lst[8])
+ k = tf_id + '.' + target_id
+ if not k in d:
+ d[k] = {}
+ d[k]['metric'] = metric
+ d[k]['line'] = neg2pos(line) # make the third field (correlation) positive if it is negative. Indicate influence, not activation/repression.
+ else:
+ if d[k]['metric'] < metric:
+ d[k]['metric'] = metric
+ d[k]['line'] = neg2pos(line)
+ f.close()
+ return d
+
+
+# main
+exclude_tf_list = ['AT4G26840', 'AT3G18550']
+edge_file_name = sys.argv[1]
+d = make_edge_dict(edge_file_name, exclude_tf_list)
+f = open(edge_file_name, 'w') # this will make edge.txt empty
+for k in sorted(d.keys()):
+ f.write('%s\n' % d[k]['line'])
+f.close()
|