1 files changed, 14 insertions, 24 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index ef870fb..e0b1c61 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -1,23 +1,24 @@
-# Purpose: When edges.txt contains multiple lines representing the
-#          same edge, choose only one edge.
+# Purpose: When edges.txt.* contains multiple lines representing the
+#          same edge, merge them and keep only one edge.
 #
 # Usage: python merge_edges.py
 #
-#        This script is used to produce the edges.txt for the brain
-#        web application.  It searches in EDGE_POOL_DIR for edge files
-#        (with 10 columns) from many sources, most likely with
-#        duplicated edges.  It removes duplication and computes
+#        This script is used to produce a single file edges.txt for
+#        the brain web application.  It searches in EDGE_POOL_DIR for
+#        edge files (with 10 columns) from many sources, most likely
+#        having duplicated edges.  It removes duplication and computes
 #        strength for each edge.
 #
-# Note: make sure fname is edges.txt Rationale: to save place, I am no
-# longer going to use a full list of RNA-seq experiment IDs in the
-# fifth column. Use a number instead.  This number is the length of
-# RNA-seq experiment IDs.  If no IDs are available, this number is 1.
-# However, I am still going to keep a full list of ChIP-seq experiment
-# IDs (the sixth column).
+# Note: make sure fname is edges.txt.
+#
+# Rationale: to save place, I am no longer going to use a full list of
+# RNA-seq experiment IDs in the fifth column.  Use a number, i.e., the
+# number of RNA-seq IDs, instead.  If no IDs are available, this
+# number is 1 (very conservative).  However, I am still going to keep
+# a full list of ChIP-seq experiment IDs (the sixth column).
 #
 # Created on 3 August 2019 by Hui Lan <lanhui@zjnu.edu.cn>
-# Last modified on 5 August 2019 by Hui Lan <lanhui@zjnu.edu.cn>
+
 
 import os, operator, sys, math, datetime, glob
 from configure import EDGE_POOL_DIR, MERGED_EDGE_FILE
@@ -69,10 +70,6 @@ def make_html_page(lst, fname):
     body += '<a id="myLink" href="javascript:void(0);" onclick="drawScatterPlot(\'json/%s.json\', \'json/%s.json\', \'rnaseq_info_database.json\', [\'.\']);">Click for gene expression scatter plot</a>\n' % (tf, target)
     body += '<p>For more detailed analysis, <a href="gene-expression-level-scatterplot-by-XuMengqi.zip">download</a> our gene expression scatter plotting tool.  No installation is required.  Input data: <a href="json/%s.json">TF gene expression</a>  <a href="json/%s.json">Target gene expression</a>  <a href="rnaseq_info_database.json">RNA-seq annotation</a></p>\n' % (tf, target)    
     body += '<p id="chart"></p>\n'
-##    if 'AT2G44304' in lst[0] and 'AT2G24700' in lst[1]:
-##        print(lst)
-##        sys.exit()
-        
     s += '<body>%s</body>\n' % (body)
     s += '</html>'
     f = open(fname, 'w')
@@ -116,19 +113,12 @@ def make_new_edge(lst_tuple):
         method_or_tissue.append(t[9])
     S = 365 * 10
     curr_date = datetime.datetime.now().strftime('%Y%m%d')
-    #time_diff = int(most_recent_edge_date) - int(curr_date)
     time_diff = compute_time_difference_in_days(most_recent_edge_date, curr_date)
     strength = sum(r_lst)/len(r_lst) * math.log(sum(RN_lst)/len(RN_lst)+1, 10) * math.log(F+1, 2) * math.exp(time_diff/S)
     best_edge[4] = '%d' % max(RN_lst)
     best_edge[5] = cids
     best_edge[8] = '%.2f' % strength
     best_edge[9] = ','.join(sorted(list(set(method_or_tissue)))) # unique methods or tissues, in string format
-
-##    if 'AT2G44304' in best_edge[0] and 'AT2G24700' in best_edge[1]:
-##        print(strength)
-##        print(best_edge)
-##        sys.exit()
-        
     return best_edge