# Purpose: convert results from three-way interaction analysis to edge format. # Usage: python process_3way_interaction.py ../Data/information/summary.txt > edges.txt.interaction.seeddata # Create on 9 Aug 2019 by Hui Lan from geneid2name import make_gene_name_AGI_map_dict, get_gene_name def get_2tf_1target_1score(s): ''' s looks like '398: ( AT3G10490;AT3G10480, AT1G03970, AT5G20910 ) 10.41 2' ''' lst = s.split() tf1_all = lst[2] tf2_all = lst[3] target = lst[4] score = lst[6] result = [] for tf1 in tf1_all.split(';'): tf1 = tf1.replace(',', '') for tf2 in tf2_all.split(';'): tf2 = tf2.replace(',', '') result.append((tf1, tf2, target, score)) return result ## main import sys from datetime import datetime f = open(sys.argv[1]) lines = f.readlines() f.close() agi2name_dict = make_gene_name_AGI_map_dict('../Data/information/AGI-to-gene-names_v2.txt') result = '' for line in lines[2:]: line = line.strip() lst = get_2tf_1target_1score(line) for t in lst: tf1, tf2, target, interaction_score_str = t if tf1.startswith('AT') and tf2.startswith('AT') and target.startswith('AT'): target_str = target + ' ' + get_gene_name(target, agi2name_dict) tf1_str = tf1 + ' ' + get_gene_name(tf1, agi2name_dict) tf2_str = tf2 + ' ' + get_gene_name(tf2, agi2name_dict) score_str = '0.6' cond_str = '.' curr_date = datetime.now().strftime('%Y%m%d') method_or_tissue = 'interact.with.%s' % (tf2 + '(' + get_gene_name(tf2, agi2name_dict) + ')') s = '\t'.join([target_str, tf1_str, score_str, 'mix', '138', cond_str, '.', curr_date, interaction_score_str.replace('-',''), method_or_tissue]) result += s + '\n' method_or_tissue = 'interact.with.%s' % (tf1 + '(' + get_gene_name(tf1, agi2name_dict) + ')') s = '\t'.join([target_str, tf2_str, score_str, 'mix', '138', cond_str, '.', curr_date, interaction_score_str.replace('-',''), method_or_tissue]) result += s + '\n' print(result)