summaryrefslogtreecommitdiff
path: root/Code/merge_edges.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/merge_edges.py')
-rw-r--r--Code/merge_edges.py38
1 files changed, 14 insertions, 24 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index ef870fb..e0b1c61 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -1,23 +1,24 @@
-# Purpose: When edges.txt contains multiple lines representing the
-# same edge, choose only one edge.
+# Purpose: When edges.txt.* contains multiple lines representing the
+# same edge, merge them and keep only one edge.
#
# Usage: python merge_edges.py
#
-# This script is used to produce the edges.txt for the brain
-# web application. It searches in EDGE_POOL_DIR for edge files
-# (with 10 columns) from many sources, most likely with
-# duplicated edges. It removes duplication and computes
+# This script is used to produce a single file edges.txt for
+# the brain web application. It searches in EDGE_POOL_DIR for
+# edge files (with 10 columns) from many sources, most likely
+# having duplicated edges. It removes duplication and computes
# strength for each edge.
#
-# Note: make sure fname is edges.txt Rationale: to save place, I am no
-# longer going to use a full list of RNA-seq experiment IDs in the
-# fifth column. Use a number instead. This number is the length of
-# RNA-seq experiment IDs. If no IDs are available, this number is 1.
-# However, I am still going to keep a full list of ChIP-seq experiment
-# IDs (the sixth column).
+# Note: make sure fname is edges.txt.
+#
+# Rationale: to save place, I am no longer going to use a full list of
+# RNA-seq experiment IDs in the fifth column. Use a number, i.e., the
+# number of RNA-seq IDs, instead. If no IDs are available, this
+# number is 1 (very conservative). However, I am still going to keep
+# a full list of ChIP-seq experiment IDs (the sixth column).
#
# Created on 3 August 2019 by Hui Lan <lanhui@zjnu.edu.cn>
-# Last modified on 5 August 2019 by Hui Lan <lanhui@zjnu.edu.cn>
+
import os, operator, sys, math, datetime, glob
from configure import EDGE_POOL_DIR, MERGED_EDGE_FILE
@@ -69,10 +70,6 @@ def make_html_page(lst, fname):
body += '<a id="myLink" href="javascript:void(0);" onclick="drawScatterPlot(\'json/%s.json\', \'json/%s.json\', \'rnaseq_info_database.json\', [\'.\']);">Click for gene expression scatter plot</a>\n' % (tf, target)
body += '<p>For more detailed analysis, <a href="gene-expression-level-scatterplot-by-XuMengqi.zip">download</a> our gene expression scatter plotting tool. No installation is required. Input data: <a href="json/%s.json">TF gene expression</a> <a href="json/%s.json">Target gene expression</a> <a href="rnaseq_info_database.json">RNA-seq annotation</a></p>\n' % (tf, target)
body += '<p id="chart"></p>\n'
-## if 'AT2G44304' in lst[0] and 'AT2G24700' in lst[1]:
-## print(lst)
-## sys.exit()
-
s += '<body>%s</body>\n' % (body)
s += '</html>'
f = open(fname, 'w')
@@ -116,19 +113,12 @@ def make_new_edge(lst_tuple):
method_or_tissue.append(t[9])
S = 365 * 10
curr_date = datetime.datetime.now().strftime('%Y%m%d')
- #time_diff = int(most_recent_edge_date) - int(curr_date)
time_diff = compute_time_difference_in_days(most_recent_edge_date, curr_date)
strength = sum(r_lst)/len(r_lst) * math.log(sum(RN_lst)/len(RN_lst)+1, 10) * math.log(F+1, 2) * math.exp(time_diff/S)
best_edge[4] = '%d' % max(RN_lst)
best_edge[5] = cids
best_edge[8] = '%.2f' % strength
best_edge[9] = ','.join(sorted(list(set(method_or_tissue)))) # unique methods or tissues, in string format
-
-## if 'AT2G44304' in best_edge[0] and 'AT2G24700' in best_edge[1]:
-## print(strength)
-## print(best_edge)
-## sys.exit()
-
return best_edge