summaryrefslogtreecommitdiff
path: root/Code/test_network4.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
commit97fdefab064f63642fa3ece05b807d29b459df31 (patch)
treea058530023224f3e35b1783996f3530c80c04bc5 /Code/test_network4.py
brain: add python and R code to local repository.
Diffstat (limited to 'Code/test_network4.py')
-rw-r--r--Code/test_network4.py205
1 files changed, 205 insertions, 0 deletions
diff --git a/Code/test_network4.py b/Code/test_network4.py
new file mode 100644
index 0000000..44ce492
--- /dev/null
+++ b/Code/test_network4.py
@@ -0,0 +1,205 @@
+# Make tissue specific networks
+
+import os, sys
+from geneid2name import make_gene_name_AGI_map_dict
+
+def get_tfs(fname_lst):
+ d = {}
+ for fname in fname_lst:
+ f = open(fname)
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ tf = lst[1].split()[0]
+ if not tf in d:
+ d[tf] = 1
+ else:
+ d[tf] += 1
+ f.close()
+ return d
+
+def get_tissue_from_fname(fname):
+ tissue_lst = [
+ 'seedling',
+ 'meristem',
+ 'flower',
+ 'aerial',
+ 'shoot',
+ 'seed',
+ 'leaf',
+ 'root',
+ 'stem']
+ for x in tissue_lst:
+ if x in fname:
+ return x
+ return 'unknown'
+
+def get_edges_consisting_of_tfs(fname_lst, tf_dict):
+ d = {}
+ for fname in fname_lst:
+ kt = get_tissue_from_fname(fname)
+ d[kt] = {}
+ f = open(fname)
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ target = lst[0].split()[0].strip()
+ tf = lst[1].split()[0].strip()
+ k = target + '_' + tf
+ score = float(lst[2])
+ if tf in tf_dict and target in tf_dict:
+ if not k in d[kt]:
+ d[kt][k] = [(lst[0], lst[1], score)]
+ else:
+ d[kt][k].append((lst[0], lst[1], score))
+ f.close()
+ return d
+
+def get_degree(fname_lst, tf_dict):
+ d_out = {}
+ d_in = {}
+ d_all = {}
+ for fname in fname_lst:
+ kt = get_tissue_from_fname(fname)
+ d_out[kt] = {}
+ d_in[kt] = {}
+ d_all[kt] = {}
+ f = open(fname)
+ for line in f:
+ line = line.strip()
+ lst = line.split('\t')
+ target = lst[0].split()[0].strip()
+ tf = lst[1].split()[0].strip()
+ if True or tf in tf_dict and target in tf_dict:
+ if not tf in d_out[kt]:
+ d_out[kt][tf] = 1
+ else:
+ d_out[kt][tf] += 1
+
+ if not target in d_in[kt]:
+ d_in[kt][target] = 1
+ else:
+ d_in[kt][target] += 1
+
+ if not target in d_all[kt]:
+ d_all[kt][target] = 1
+ else:
+ d_all[kt][target] += 1
+
+ if not tf in d_all[kt]:
+ d_all[kt][tf] = 1
+ else:
+ d_all[kt][tf] += 1
+
+ f.close()
+ return d_all, d_out, d_in
+
+
+def simplify(s):
+ result = ''
+ lst = s.split('\t')
+ a = (lst[0].split()[1]).split(';')[0]
+ if a == '.':
+ a = lst[0].split()[0]
+ else:
+ a = lst[0].split()[0] + '_' + (lst[0].split()[1]).split(';')[0]
+ b = (lst[1].split()[1]).split(';')[0]
+ if b == '.':
+ b = lst[1].split()[0]
+ else:
+ b = lst[1].split()[0] + '_' + (lst[1].split()[1]).split(';')[0]
+ return '%s\t%s\t%s' % (a, b, lst[2])
+
+# main
+GENE_ID_TO_GENE_NAME = '../Data/information/AGI-to-gene-names_v2.txt'
+agi2name_dict = make_gene_name_AGI_map_dict(GENE_ID_TO_GENE_NAME)
+
+edge_file_lst = [
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.seedling.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.meristem.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.flower.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.aerial.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.shoot.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.seed.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.leaf.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.root.txt.20170629_203729',
+ '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.stem.txt.20170629_203729'
+]
+
+tf_dict = get_tfs(edge_file_lst)
+
+f = open('result.skeleton.txt', 'w')
+print('Total number of TFs: %d' % (len(tf_dict)))
+d0 = get_edges_consisting_of_tfs(edge_file_lst, tf_dict)
+for kt in d0: # kt is tissue
+ f.write('##TF skeleton size in %s: %d.\n' % (kt, len(d0[kt])))
+ d = d0[kt]
+ for k in d:
+ lst = d[k]
+ for x in lst: # {'shoot':{'target_tf':[], }, 'flower':{} }
+ max_score = -9
+ s = ''
+ if abs(x[2]) > max_score:
+ s = '%s\t%s\t%4.2f' % (x[0], x[1], x[2])
+ max_score = x[2]
+ f.write(simplify(s) + '\n')
+f.close()
+
+# for each TF, get its out-degree and in-degree in each tissue
+dd_all, dd_out, dd_in = get_degree(edge_file_lst, tf_dict)
+f = open('result.out.txt', 'w')
+head_lst = ['TF']
+for k in dd_out:
+ head_lst.append(k)
+f.write('%s\n' %('\t'.join(head_lst)))
+for tf in tf_dict:
+ s = tf
+ name = '.'
+ if tf in agi2name_dict and agi2name_dict[tf] != tf:
+ name = agi2name_dict[tf]
+ s += ' ' + name
+ for k in dd_out:
+ if tf in dd_out[k]:
+ s += '\t%d' % (dd_out[k][tf])
+ else:
+ s += '\t0'
+ f.write(s + '\n')
+f.close()
+
+f = open('result.in.txt', 'w')
+head_lst = ['TF']
+for k in dd_in:
+ head_lst.append(k)
+f.write('%s\n' %('\t'.join(head_lst)))
+for tf in tf_dict:
+ s = tf
+ name = '.'
+ if tf in agi2name_dict and agi2name_dict[tf] != tf:
+ name = agi2name_dict[tf]
+ s += ' ' + name
+ for k in dd_in:
+ if tf in dd_in[k]:
+ s += '\t%d' % (dd_in[k][tf])
+ else:
+ s += '\t0'
+ f.write(s + '\n')
+f.close()
+
+f = open('result.all.txt', 'w')
+head_lst = ['TF']
+for k in dd_all:
+ head_lst.append(k)
+f.write('%s\n' %('\t'.join(head_lst)))
+for tf in tf_dict:
+ s = tf
+ name = '.'
+ if tf in agi2name_dict and agi2name_dict[tf] != tf:
+ name = agi2name_dict[tf]
+ s += ' ' + name
+ for k in dd_all:
+ if tf in dd_all[k]:
+ s += '\t%d' % (dd_all[k][tf])
+ else:
+ s += '\t0'
+ f.write(s + '\n')
+f.close()