From 97fdefab064f63642fa3ece05b807d29b459df31 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Wed, 4 Dec 2019 19:03:19 +0800 Subject: brain: add python and R code to local repository. --- Code/test_network4.py | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 Code/test_network4.py (limited to 'Code/test_network4.py') diff --git a/Code/test_network4.py b/Code/test_network4.py new file mode 100644 index 0000000..44ce492 --- /dev/null +++ b/Code/test_network4.py @@ -0,0 +1,205 @@ +# Make tissue specific networks + +import os, sys +from geneid2name import make_gene_name_AGI_map_dict + +def get_tfs(fname_lst): + d = {} + for fname in fname_lst: + f = open(fname) + for line in f: + line = line.strip() + lst = line.split('\t') + tf = lst[1].split()[0] + if not tf in d: + d[tf] = 1 + else: + d[tf] += 1 + f.close() + return d + +def get_tissue_from_fname(fname): + tissue_lst = [ + 'seedling', + 'meristem', + 'flower', + 'aerial', + 'shoot', + 'seed', + 'leaf', + 'root', + 'stem'] + for x in tissue_lst: + if x in fname: + return x + return 'unknown' + +def get_edges_consisting_of_tfs(fname_lst, tf_dict): + d = {} + for fname in fname_lst: + kt = get_tissue_from_fname(fname) + d[kt] = {} + f = open(fname) + for line in f: + line = line.strip() + lst = line.split('\t') + target = lst[0].split()[0].strip() + tf = lst[1].split()[0].strip() + k = target + '_' + tf + score = float(lst[2]) + if tf in tf_dict and target in tf_dict: + if not k in d[kt]: + d[kt][k] = [(lst[0], lst[1], score)] + else: + d[kt][k].append((lst[0], lst[1], score)) + f.close() + return d + +def get_degree(fname_lst, tf_dict): + d_out = {} + d_in = {} + d_all = {} + for fname in fname_lst: + kt = get_tissue_from_fname(fname) + d_out[kt] = {} + d_in[kt] = {} + d_all[kt] = {} + f = open(fname) + for line in f: + line = line.strip() + lst = line.split('\t') + target = lst[0].split()[0].strip() + tf = lst[1].split()[0].strip() + if True or tf in tf_dict and target in tf_dict: + if not tf in d_out[kt]: + d_out[kt][tf] = 1 + else: + d_out[kt][tf] += 1 + + if not target in d_in[kt]: + d_in[kt][target] = 1 + else: + d_in[kt][target] += 1 + + if not target in d_all[kt]: + d_all[kt][target] = 1 + else: + d_all[kt][target] += 1 + + if not tf in d_all[kt]: + d_all[kt][tf] = 1 + else: + d_all[kt][tf] += 1 + + f.close() + return d_all, d_out, d_in + + +def simplify(s): + result = '' + lst = s.split('\t') + a = (lst[0].split()[1]).split(';')[0] + if a == '.': + a = lst[0].split()[0] + else: + a = lst[0].split()[0] + '_' + (lst[0].split()[1]).split(';')[0] + b = (lst[1].split()[1]).split(';')[0] + if b == '.': + b = lst[1].split()[0] + else: + b = lst[1].split()[0] + '_' + (lst[1].split()[1]).split(';')[0] + return '%s\t%s\t%s' % (a, b, lst[2]) + +# main +GENE_ID_TO_GENE_NAME = '../Data/information/AGI-to-gene-names_v2.txt' +agi2name_dict = make_gene_name_AGI_map_dict(GENE_ID_TO_GENE_NAME) + +edge_file_lst = [ + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.seedling.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.meristem.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.flower.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.aerial.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.shoot.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.seed.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.leaf.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.root.txt.20170629_203729', + '/home/hui/network/v03/Data/history/edges/many_targets/edges.txt.simple.correlation.stem.txt.20170629_203729' +] + +tf_dict = get_tfs(edge_file_lst) + +f = open('result.skeleton.txt', 'w') +print('Total number of TFs: %d' % (len(tf_dict))) +d0 = get_edges_consisting_of_tfs(edge_file_lst, tf_dict) +for kt in d0: # kt is tissue + f.write('##TF skeleton size in %s: %d.\n' % (kt, len(d0[kt]))) + d = d0[kt] + for k in d: + lst = d[k] + for x in lst: # {'shoot':{'target_tf':[], }, 'flower':{} } + max_score = -9 + s = '' + if abs(x[2]) > max_score: + s = '%s\t%s\t%4.2f' % (x[0], x[1], x[2]) + max_score = x[2] + f.write(simplify(s) + '\n') +f.close() + +# for each TF, get its out-degree and in-degree in each tissue +dd_all, dd_out, dd_in = get_degree(edge_file_lst, tf_dict) +f = open('result.out.txt', 'w') +head_lst = ['TF'] +for k in dd_out: + head_lst.append(k) +f.write('%s\n' %('\t'.join(head_lst))) +for tf in tf_dict: + s = tf + name = '.' + if tf in agi2name_dict and agi2name_dict[tf] != tf: + name = agi2name_dict[tf] + s += ' ' + name + for k in dd_out: + if tf in dd_out[k]: + s += '\t%d' % (dd_out[k][tf]) + else: + s += '\t0' + f.write(s + '\n') +f.close() + +f = open('result.in.txt', 'w') +head_lst = ['TF'] +for k in dd_in: + head_lst.append(k) +f.write('%s\n' %('\t'.join(head_lst))) +for tf in tf_dict: + s = tf + name = '.' + if tf in agi2name_dict and agi2name_dict[tf] != tf: + name = agi2name_dict[tf] + s += ' ' + name + for k in dd_in: + if tf in dd_in[k]: + s += '\t%d' % (dd_in[k][tf]) + else: + s += '\t0' + f.write(s + '\n') +f.close() + +f = open('result.all.txt', 'w') +head_lst = ['TF'] +for k in dd_all: + head_lst.append(k) +f.write('%s\n' %('\t'.join(head_lst))) +for tf in tf_dict: + s = tf + name = '.' + if tf in agi2name_dict and agi2name_dict[tf] != tf: + name = agi2name_dict[tf] + s += ' ' + name + for k in dd_all: + if tf in dd_all[k]: + s += '\t%d' % (dd_all[k][tf]) + else: + s += '\t0' + f.write(s + '\n') +f.close() -- cgit v1.2.1