From 3dfa80be41cf530b4bc0845eab5e7f294fd4441b Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Wed, 4 Dec 2019 19:18:38 +0800 Subject: merge_edges.py: clean up source code by removing commented lines and editing the head comments. --- Code/merge_edges.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'Code') diff --git a/Code/merge_edges.py b/Code/merge_edges.py index ef870fb..e0b1c61 100644 --- a/Code/merge_edges.py +++ b/Code/merge_edges.py @@ -1,23 +1,24 @@ -# Purpose: When edges.txt contains multiple lines representing the -# same edge, choose only one edge. +# Purpose: When edges.txt.* contains multiple lines representing the +# same edge, merge them and keep only one edge. # # Usage: python merge_edges.py # -# This script is used to produce the edges.txt for the brain -# web application. It searches in EDGE_POOL_DIR for edge files -# (with 10 columns) from many sources, most likely with -# duplicated edges. It removes duplication and computes +# This script is used to produce a single file edges.txt for +# the brain web application. It searches in EDGE_POOL_DIR for +# edge files (with 10 columns) from many sources, most likely +# having duplicated edges. It removes duplication and computes # strength for each edge. # -# Note: make sure fname is edges.txt Rationale: to save place, I am no -# longer going to use a full list of RNA-seq experiment IDs in the -# fifth column. Use a number instead. This number is the length of -# RNA-seq experiment IDs. If no IDs are available, this number is 1. -# However, I am still going to keep a full list of ChIP-seq experiment -# IDs (the sixth column). +# Note: make sure fname is edges.txt. +# +# Rationale: to save place, I am no longer going to use a full list of +# RNA-seq experiment IDs in the fifth column. Use a number, i.e., the +# number of RNA-seq IDs, instead. If no IDs are available, this +# number is 1 (very conservative). However, I am still going to keep +# a full list of ChIP-seq experiment IDs (the sixth column). # # Created on 3 August 2019 by Hui Lan -# Last modified on 5 August 2019 by Hui Lan + import os, operator, sys, math, datetime, glob from configure import EDGE_POOL_DIR, MERGED_EDGE_FILE @@ -69,10 +70,6 @@ def make_html_page(lst, fname): body += 'Click for gene expression scatter plot\n' % (tf, target) body += '

For more detailed analysis, download our gene expression scatter plotting tool. No installation is required. Input data: TF gene expression Target gene expression RNA-seq annotation

\n' % (tf, target) body += '

\n' -## if 'AT2G44304' in lst[0] and 'AT2G24700' in lst[1]: -## print(lst) -## sys.exit() - s += '%s\n' % (body) s += '' f = open(fname, 'w') @@ -116,19 +113,12 @@ def make_new_edge(lst_tuple): method_or_tissue.append(t[9]) S = 365 * 10 curr_date = datetime.datetime.now().strftime('%Y%m%d') - #time_diff = int(most_recent_edge_date) - int(curr_date) time_diff = compute_time_difference_in_days(most_recent_edge_date, curr_date) strength = sum(r_lst)/len(r_lst) * math.log(sum(RN_lst)/len(RN_lst)+1, 10) * math.log(F+1, 2) * math.exp(time_diff/S) best_edge[4] = '%d' % max(RN_lst) best_edge[5] = cids best_edge[8] = '%.2f' % strength best_edge[9] = ','.join(sorted(list(set(method_or_tissue)))) # unique methods or tissues, in string format - -## if 'AT2G44304' in best_edge[0] and 'AT2G24700' in best_edge[1]: -## print(strength) -## print(best_edge) -## sys.exit() - return best_edge -- cgit v1.2.1