From cc6858ae8e1a3eb24f68047ec4009b0bb9ab10ad Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Tue, 11 Feb 2020 18:03:52 +0800 Subject: merge_edges.py: make a better key Use a combination of target gene ID and tf gene ID as a key. So if we having the following: Target: AT5G09445 AT5G09445 TF: AT1G53910 RAP2.12 Then the key will be "AT5G09445_AT1G53910". Before it was "AT5G09445 AT5G09445 AT1G53910 RAP2.12". This is OK in most cases, as long a gene ID's corresponding gene name is consistent. But if "AT1G53910" has a different gene name, then we will have a DIFFERENT key, which is not what we want. --- Code/merge_edges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Code') diff --git a/Code/merge_edges.py b/Code/merge_edges.py index 84535d7..62a958a 100644 --- a/Code/merge_edges.py +++ b/Code/merge_edges.py @@ -160,7 +160,7 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): strength = lst[8] method_or_tissue = lst[9] - key = target + tf + key = target.split()[0] + '_' + tf.split()[0] # target or tf has two fields, Gene ID and Gene Name, split()[0] means using Gene ID only. t = (target, tf, score, type_of_score, rids, cids, ll, date, strength, method_or_tissue) if not key in d: -- cgit v1.2.1