diff options
| author | Hui Lan <lanhui@zjnu.edu.cn> | 2020-05-04 12:32:46 +0800 | 
|---|---|---|
| committer | Hui Lan <lanhui@zjnu.edu.cn> | 2020-05-04 12:32:46 +0800 | 
| commit | a2504c8825d5bb22a377860083a49d0628630d82 (patch) | |
| tree | 6e6d48c57f41d28a93367f5d7704b3c4b5688586 /Code | |
| parent | 749e1f3e3040edc266d4c8d5acd9d3367d3e9fbe (diff) | |
merge_edges.py: make sqlite insert really fast.  Commit in the end instead of commit for each insertion.
Diffstat (limited to 'Code')
| -rw-r--r-- | Code/merge_edges.py | 15 | 
1 files changed, 8 insertions, 7 deletions
| diff --git a/Code/merge_edges.py b/Code/merge_edges.py index 08e76fb..842b27f 100644 --- a/Code/merge_edges.py +++ b/Code/merge_edges.py @@ -91,7 +91,7 @@ def make_html_page(lst, fname):      f.close() -def fill_database(lst, conn): +def fill_database(lst, cursor):      ''' Store all edge information in a SQLite database, which can be retrieved in the Webapp.'''      tf     = lst[1].split()[0] # ID only, no name      tf_name = split_id_and_name(lst[1])     @@ -101,9 +101,7 @@ def fill_database(lst, conn):      edge_date = add_dashes_to_date(lst[7])      method = make_html_list(lst[9])      evidence = lst[5] if lst[5] != '.' else 'TBA' -    conn.execute('CREATE TABLE IF NOT EXISTS edge (target_id text, target_name text, tf_id text, tf_name text, strength text, date text, method text, evidence text)') -    conn.execute('INSERT INTO edge (target_id, target_name, tf_id, tf_name, strength, date, method, evidence) VALUES (?,?,?,?,?,?,?,?)', (target, target_name, tf, tf_name, strength, edge_date, method, evidence)) -    conn.commit() +    cursor.execute('INSERT INTO edge (target_id, target_name, tf_id, tf_name, strength, date, method, evidence) VALUES (?,?,?,?,?,?,?,?)', (target, target_name, tf, tf_name, strength, edge_date, method, evidence))  def compute_time_difference_in_days(t1, t2): @@ -148,7 +146,7 @@ def make_new_edge2(d):  ##main -write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE) +write_log_file('[merge_edges.py]: Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE)  d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples.  Each tuple is a historical edge.  file_count = 0  for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))): @@ -200,7 +198,7 @@ for fname in sorted(glob.glob(os.path.join(EDGE_POOL_DIR, 'edges*.*'))):      f.close() -write_log_file('[merge_edges.py] BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE)             +write_log_file('[merge_edges.py]: BRAIN has collected edges from %d files.' % (file_count) , UPDATE_NETWORK_LOG_FILE)              # make html pages  folder_path = '../Data/temp/html_edges' @@ -224,6 +222,8 @@ if os.path.exists(db_fname):      os.remove(db_fname)  conn = sqlite3.connect(db_fname) +c = conn.cursor() +c.execute('CREATE TABLE IF NOT EXISTS edge (target_id text, target_name text, tf_id text, tf_name text, strength text, date text, method text, evidence text)')  for k in d:      lst = make_new_edge2(d[k])      # Make an html page for each edge (taking Big disk space).  This will take about 5GB disk space @@ -233,6 +233,7 @@ for k in d:      # make_html_page(lst, folder_path + '/' + pagename)        # Write to a SQLite database file called edges.sqlite, which will be used for the Webapp.      # edges.sqlite will be put under static/edges/ for querying. -    fill_database(lst, conn) +    fill_database(lst, c) +conn.commit()  conn.close() | 
