summaryrefslogtreecommitdiff
path: root/Code
diff options
context:
space:
mode:
Diffstat (limited to 'Code')
-rw-r--r--Code/merge_edges.py56
1 files changed, 51 insertions, 5 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index ffe338c..78bef5c 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -152,6 +152,34 @@ def make_new_edge(lst_tuple):
return best_edge
+def get_unique_cids(lst):
+ ''' Return a list of unique, sorted ChIP-seq IDs. '''
+ cids = []
+ for x in lst:
+ sublst = x.split()
+ cids.extend(sublst)
+ result = sorted(list(set(cids)))
+ if len(result) > 1 and result[0] == '.':
+ result.pop(0)
+ return ' '.join(result)
+
+
+def make_new_edge2(d):
+ best_edge = list(d['best_edge'])
+ S = 365 * 10
+ curr_date = datetime.datetime.now().strftime('%Y%m%d')
+ most_recent_edge_date = d['stat']['most_recent_edge_date']
+ time_diff = compute_time_difference_in_days(most_recent_edge_date, curr_date)
+ F = d['stat']['F']
+ strength = (d['stat']['sumr']/F) * math.log(d['stat']['sumRN']/F + 1, 10) * math.log(F+1, 2) * math.exp(time_diff/S)
+ best_edge[4] = d['stat']['maxRN']
+ best_edge[5] = get_unique_cids(d['stat']['allCID'])
+ best_edge[7] = most_recent_edge_date
+ best_edge[8] = '%.2f' % strength
+ method_or_tissue = d['stat']['method_or_tissue']
+ best_edge[9] = ','.join(sorted(list(set(method_or_tissue))))
+ return best_edge
+
##main
@@ -182,9 +210,27 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):
t = (target, tf, score, type_of_score, rids, cids, ll, date, strength, method_or_tissue)
if not key in d:
- d[key] = [t]
- elif not t in d[key]: # make sure the tuple to be added to d[key] (a list) does not alreay exist.
- d[key].append(t)
+ d[key] = {'best_edge':t, 'stat':{'sumr':abs(float(score)) , 'allCID':[cids], 'maxRN':get_number_of_RNAseq_ids(rids), 'sumRN':get_number_of_RNAseq_ids(rids), 'F':1, 'most_recent_edge_date':date, 'method_or_tissue':[method_or_tissue]}}
+ else:
+ # update best edge
+ old_score = float(d[key]['best_edge'][2])
+ new_score = float(score)
+ if abs(new_score) > abs(old_score):
+ d[key]['best_edge'] = t
+
+ # update stat information
+ d[key]['stat']['sumr'] += abs(new_score)
+ d[key]['stat']['sumRN'] += get_number_of_RNAseq_ids(rids)
+ d[key]['stat']['F'] += 1
+
+ if get_number_of_RNAseq_ids(rids) > d[key]['stat']['maxRN']:
+ d[key]['stat']['maxRN'] = get_number_of_RNAseq_ids(rids)
+ if date > d[key]['stat']['most_recent_edge_date']:
+ d[key]['stat']['most_recent_edge_date'] = date
+ if not cids in d[key]['stat']['allCID']:
+ d[key]['stat']['allCID'].append(cids)
+ if not method_or_tissue in d[key]['stat']['method_or_tissue']:
+ d[key]['stat']['method_or_tissue'].append(method_or_tissue)
f.close()
@@ -199,7 +245,7 @@ if not os.path.isdir(folder_path):
write_log_file('[merge_edges.py]: Make text edge file %s ...' % (MERGED_EDGE_FILE), UPDATE_NETWORK_LOG_FILE)
fout = open(MERGED_EDGE_FILE, 'w')
for k in d:
- lst = make_new_edge(d[k])
+ lst = make_new_edge2(d[k])
fout.write('\t'.join(lst) + '\n')
fout.close()
@@ -213,7 +259,7 @@ if os.path.exists(db_fname):
conn = sqlite3.connect(db_fname)
for k in d:
- lst = make_new_edge(d[k])
+ lst = make_new_edge2(d[k])
# Make an html page for each edge (taking Big disk space). This will take about 5GB disk space
# for 1.3 million edges, not very disk space friendly. So I use a database-driven dynamic method
# to save space.