brain: add python and R code to local repository.

author: Hui Lan <lanhui@zjnu.edu.cn> 2019-12-04 19:03:19 +0800
committer: Hui Lan <lanhui@zjnu.edu.cn> 2019-12-04 19:03:19 +0800
commit: 97fdefab064f63642fa3ece05b807d29b459df31 (patch)
tree: a058530023224f3e35b1783996f3530c80c04bc5 /Code/TPM2JSON.py
1 files changed, 115 insertions, 0 deletions
diff --git a/Code/TPM2JSON.py b/Code/TPM2JSON.py
new file mode 100644
index 0000000..6d5a423
--- /dev/null
+++ b/Code/TPM2JSON.py
@@ -0,0 +1,115 @@
+# Usage: python TPM2JSON.py parameter_for_net.txt
+# Purpose:
+#   For each gene in TPM.txt, make a json file in directory JSON_DIR.  So we don't need to load the whole TPM.txt later (more memory efficient).
+# 4 APR 2017, hui, slcu
+
+import sys, os, operator, itertools
+import numpy as np
+import json
+from param4net import make_global_param_dict
+
+JSON_DIR = '../Data/history/expr/jsonTPM' # Don't change this
+
+def read_matrix_data(fname):
+    ''' 
+    fname - a file, first line is head, first column is row name.
+    '''
+    
+    lineno = 0
+    colid = []
+    rowid = []
+    d =  {}  # {gene1:{cond1:val1, cond2:val2, ...}, gene2: {...}, ...}
+    d2 = {} # {cond1:{gene1:val1, gene2:val2, ...}, cond2: {...}, ...}
+    d3 = {} # {gene1: [], gene2: [], ...}
+    d4 = {} # {cond1:[], cond2:[], ...}
+
+    f = open(fname)
+    lines = f.readlines()
+    f.close()
+
+    head_line = lines[0].strip()
+    lst = head_line.split()
+    colid = lst[1:]
+
+    for c in colid:
+        d2[c] = {}
+        d4[c] = []
+    
+    for line in lines[1:]:
+        line = line.strip()
+        lst = line.split()
+        g = lst[0]
+        rowid.append(g)
+        d[g] = {}
+        levels = lst[1:]
+        if len(levels) != len(colid):
+            print('Incomplete columns at row %s' % (g))
+            sys.exit()
+            
+        d3[g] = []
+        for i in range(len(colid)):
+            c = colid[i]
+            d[g][c]  = float(levels[i])
+            d2[c][g] = float(levels[i])
+            d3[g].append(float(levels[i]))
+            d4[c].append(float(levels[i]))
+        lineno += 1
+
+    d_return = {}
+    d_return['xy'] = d  # first gene, then condition
+    d_return['yx'] = d2 # first condition, then gene
+    d_return['xx'] = d3 # each item is an array of gene expression levels, i.e., each item is a row
+    d_return['yy'] = d4 # each item is an array of gene expression levels, i.e., each item is a column
+    d_return['nrow'] = lineno - 1
+    d_return['ncol'] = len(colid)
+    d_return['rowid'] = rowid
+    d_return['colid'] = colid    
+
+    d4_sorted = {}
+    for k in d4:
+        d4_sorted[k] = sorted(d4[k], reverse=True)
+    d_return['yy_sorted'] = d4_sorted
+
+    return d_return
+
+def check_json_file(expr_dict, dir_name):
+    ''' Check if json files are good, return True if yes.  '''
+
+    if not os.path.isdir(dir_name):
+        return False
+
+    d = expr_dict['xy']
+    col_name_lst = expr_dict['colid']
+    row_name_lst = expr_dict['rowid']
+    for g in row_name_lst[1:10]: # check the first 10 lines
+        d2 = d[g]
+        filename = os.path.join(dir_name, g + '.json')
+        if not os.path.exists(filename):
+            return False
+        with open(filename) as f:
+            d3 = json.load(f)
+        if len(d2) != len(d3):
+            return False
+        
+    return True
+
+def make_json_file(expr_dict, dir_name):
+    if not os.path.isdir(dir_name): # create the directory if not exist
+	os.makedirs(dir_name)
+
+    d = expr_dict['xy']
+    col_name_lst = expr_dict['colid']
+    row_name_lst = expr_dict['rowid']
+    for g in row_name_lst:
+        d2 = d[g]
+        filename = os.path.join(dir_name, g + '.json')
+        with open(filename, 'w') as f:
+            json.dump(d2, f)
+
+
+## main
+param_file = sys.argv[1] # a single prameter file
+glb_param_dict = make_global_param_dict(param_file)
+expr_dict = read_matrix_data(glb_param_dict['EXPRESSION_MATRIX'])
+if not check_json_file(expr_dict, JSON_DIR):
+    make_json_file(expr_dict, JSON_DIR)
author	Hui Lan <lanhui@zjnu.edu.cn>	2019-12-04 19:03:19 +0800
committer	Hui Lan <lanhui@zjnu.edu.cn>	2019-12-04 19:03:19 +0800
commit	97fdefab064f63642fa3ece05b807d29b459df31 (patch)
tree	a058530023224f3e35b1783996f3530c80c04bc5 /Code/TPM2JSON.py