summaryrefslogtreecommitdiff
path: root/Code/merge_edges.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/merge_edges.py')
-rw-r--r--Code/merge_edges.py46
1 files changed, 38 insertions, 8 deletions
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index a8bd3b9..5ea1485 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -22,6 +22,7 @@
import os, operator, sys, math, datetime, glob
from configure import EDGE_POOL_DIR, MERGED_EDGE_FILE
+import sqlite3
def get_number_of_RNAseq_ids(s):
if s == '.':
@@ -61,14 +62,14 @@ def make_html_page(lst, fname):
head += '<link href="./c3.min.css" rel="stylesheet" />\n<script src="./d3.min.js"></script>\n<script src="./c3.min.js"></script>\n<script src="./scatterplot.js"></script>'
s = '<html>'
s += '<head>%s</head>\n' % (head)
- body = '<p>TF is %s. </p>\n' % (split_id_and_name(lst[1]))
- body += '<p>Target is %s. </p>\n' % (split_id_and_name(lst[0]))
- body += '<p>Association strength: %s.</p>\n' % (lst[8])
+ body = '<p>TF: %s</p>\n' % (split_id_and_name(lst[1]))
+ body += '<p>Target: %s</p>\n' % (split_id_and_name(lst[0]))
+ body += '<p>Association strength: %s</p>\n' % (lst[8])
body += '<p>Edge made on %s. </p>\n' % (add_dashes_to_date(lst[7]))
body += '<p>Methods: %s</p>\n' % (make_html_list(lst[9]))
body += '<p>Evidence of binding: %s.</p>\n' % (lst[5] if lst[5] != '.' else 'TBA')
- body += '<a id="myLink" href="javascript:void(0);" onclick="drawScatterPlot(\'json/%s.json\', \'json/%s.json\', \'rnaseq_info_database.json\', [\'.\']);">Click for gene expression scatter plot</a>\n' % (tf, target)
- body += '<p>For more detailed analysis, <a href="gene-expression-level-scatterplot-by-XuMengqi.zip">download</a> our gene expression scatter plotting tool. No installation is required. Input data: <a href="json/%s.json">TF gene expression</a> <a href="json/%s.json">Target gene expression</a> <a href="rnaseq_info_database.json">RNA-seq annotation</a></p>\n' % (tf, target)
+ body += '<a id="myLink" href="javascript:void(0);" onclick="drawScatterPlot(\'json/%s.json\', \'json/%s.json\', \'rnaseq_info_database.json\', [\'.\']);">Gene expression scatter plot</a>\n' % (tf, target)
+ body += '<p>For more detailed analysis, <a href="gene-expression-level-scatterplot-by-XuMengqi.zip">download</a> our gene expression scatter plotting tool. No installation is required. Input: <a href="json/%s.json">TF gene expression</a> <a href="json/%s.json">Target gene expression</a> <a href="rnaseq_info_database.json">RNA-seq annotation</a></p>\n' % (tf, target)
body += '<p id="chart"></p>\n'
s += '<body>%s</body>\n' % (body)
s += '</html>'
@@ -77,6 +78,21 @@ def make_html_page(lst, fname):
f.close()
+def fill_database(lst, conn):
+ ''' Store all edge information in a SQLite database, which can be retrieved in the Webapp.'''
+ tf = lst[1].split()[0] # ID only, no name
+ tf_name = split_id_and_name(lst[1])
+ target = lst[0].split()[0]
+ target_name = split_id_and_name(lst[0])
+ strength = lst[8]
+ edge_date = add_dashes_to_date(lst[7])
+ method = make_html_list(lst[9])
+ evidence = lst[5] if lst[5] != '.' else 'TBA'
+ conn.execute('CREATE TABLE IF NOT EXISTS edge (target_id text, target_name text, tf_id text, tf_name text, strength text, date text, method text, evidence text)')
+ conn.execute('INSERT INTO edge (target_id, target_name, tf_id, tf_name, strength, date, method, evidence) VALUES (?,?,?,?,?,?,?,?)', (target, target_name, tf, tf_name, strength, edge_date, method, evidence))
+ conn.commit()
+
+
def compute_time_difference_in_days(t1, t2):
''' t1 and t2 has this format: yyyymmdd. '''
if not t1.isnumeric() and length(t1) != 8:
@@ -170,8 +186,22 @@ for k in d:
fout.close()
-print('[merge_edges.py]: Make html edge files. May take a while...')
+print('[merge_edges.py]: Make html edge files. May take a while ...')
+
+db_fname = folder_path + '/' + 'edges.sqlite'
+if os.path.exists(db_fname):
+ os.remove(db_fname)
+
+conn = sqlite3.connect(db_fname)
for k in d:
lst = make_new_edge(d[k])
- pagename = lst[1].split()[0] + '_' + lst[0].split()[0] + '_0.html' # TF_Target.html
- make_html_page(lst, folder_path + '/' + pagename)
+ # Make an html page for each edge (taking Big disk space). This will take about 5GB disk space
+ # for 1.3 million edges, not very disk space friendly. So I use a database-driven dynamic method
+ # to save space.
+ # pagename = lst[1].split()[0] + '_' + lst[0].split()[0] + '_0.html' # TF_Target.html
+ # make_html_page(lst, folder_path + '/' + pagename)
+ # Write to a SQLite database file called edges.sqlite, which will be used for the Webapp.
+ # edges.sqlite will be put under static/edges/ for querying.
+ fill_database(lst, conn)
+
+conn.close()