From 824629706b75b3c5b4275c2f6d6ce540bf6dfc73 Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Sun, 11 Aug 2024 15:01:58 +0800
Subject: dos2unix

---
 Code/.#update_network_by_force.py |   1 +
 Code/configure.py                 | 112 +++++++++++++++++++-------------------
 Code/merge_edges.py               |   4 ++
 Code/requirements.txt             |   1 +
 4 files changed, 62 insertions(+), 56 deletions(-)
 create mode 120000 Code/.#update_network_by_force.py
 create mode 100644 Code/requirements.txt

(limited to 'Code')

diff --git a/Code/.#update_network_by_force.py b/Code/.#update_network_by_force.py
new file mode 120000
index 0000000..212abbe
--- /dev/null
+++ b/Code/.#update_network_by_force.py
@@ -0,0 +1 @@
+lanhui@lh-ubuntu22.3552:1722838551
\ No newline at end of file
diff --git a/Code/configure.py b/Code/configure.py
index 73fc9cc..2f0fbd9 100644
--- a/Code/configure.py
+++ b/Code/configure.py
@@ -1,56 +1,56 @@
-# From get_TPM_by_salmon.py
-SALMON          = '/home/lanhui/brain/Salmon/Salmon-0.7.2_linux_x86_64/bin/salmon' # salmon software path
-SALMON_INDEX    = '/home/lanhui/brain/Salmon/salmon_index'
-TRANSCRIPTOME   = '/home/lanhui/brain/Salmon/Arabidopsis_thaliana.TAIR10.cdna.all.fa'
-SALMON_MAP_RESULT_DIR = '../Data/temp/salmon_map_result'
-KMER            = 31
-
-# From download_and_map.py
-DAILY_MAP_NUMBER = 4   # download this many samples each time.  I have tested the values of 3, 4, 5, 8.
-MIN_FASTQ_FILE_SIZE = 200000000    # in bytes, approximately 200MB
-RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json'  # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq
-DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs
-IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt'  # store SRA IDs with small file size.
-MAPPED_RDATA_DIR = '../Data/R/Mapped/public'          # mapped RNA-seq (file names ended with _quant.txt) go here
-RAW_RDATA_DIR    = '/disk1/Data/R/Raw'                # downloaded files go here, was "../Data/R/Raw" (now almost full).
-
-# From update_network.py
-# Don'T change the following paths and names
-HISTORY_DIR       = '../Data/history/edges/many_targets' # each edge file contains edges for many targets
-HISTORY_DIR2      = '../Data/history/edges/one_target'   # edges.txt.* files are here, all edge files have the name edges.txt.*, the leading string 'edges.txt' must be present.
-TIMESTAMP_FILE    = '../Data/log/file_timestamp.txt'     # record last modified time of several important files
-SAMPLE_SIZE_FILE  = '../Data/log/total.samples.txt'      # each line contains a date and the number of samples on and after that date
-TEMP_DIR          = '../Data/temp'
-
-PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt'
-PARAMETER_FOR_BUILDRMATRIX = '../Data/parameter/parameter_for_buildRmatrix.txt'
-PARAMETER_FOR_NET          = '../Data/parameter/parameter_for_net.txt'
-PARAMETER_FOR_NET_TRAVADB_STRESS      = '../Data/parameter/parameter_for_net_travadb_stress.txt'
-PARAMETER_FOR_NET_TRAVADB_MAP         = '../Data/parameter/parameter_for_net_travadb_map.txt'
-PARAMETER_FOR_NET_MILD_DROUGHT        = '../Data/parameter/parameter_for_net_mild_drought.txt'
-PARAMETER_FOR_NET_WIGGELAB_DIURNAL    = '../Data/parameter/parameter_for_net_wiggelab_diurnal.txt'
-
-BINDING_FILE               = '../Data/history/bind/binding.txt'
-TPM_FILE                   = '../Data/history/expr/TPM.txt' # gene expression data
-
-BUILDRMATRIX_RENEW_INTERVAL = 28 # check every 28 days for updating TPM.txt
-MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
-UPDATE_NETWORK_LOG_FILE  = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
-NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
-
-RNA_SEQ_INFO_DATABASE   = '../Data/information/rnaseq_info_database.txt' # same as RNA_SEQ_INFO_FILE
-RNA_SEQ_INFO_DATABASE_JSON   = '../Data/information/rnaseq_info_database.json'
-
-GENE_ID_FIRST_TWO_LETTERS = 'AT'
-MEMORY_STRENGTH = 365 # memory retention power (larger value means better memory)
-
-#
-MAPPED_CDATA_DIR = '../Data/C/Mapped' # mapped ChIp-seq data
-
-# Used in merge_edges.py
-EDGE_POOL_DIR = '/disk1/edge_pool'
-MERGED_EDGE_FILE = '../Data/temp/edges.txt'
-SQLITE_EDGE_FILE = '../Data/temp/edges.sqlite'
-DIFF_EDGE_FILE = '../Data/temp/edges-diff.txt' # the difference between two edge files from yesterday and from today
-
-TARGET_TF_FILE = '../Data/information/target_tf.txt'
+# From get_TPM_by_salmon.py
+SALMON          = '/home/lanhui/brain/Salmon/Salmon-0.7.2_linux_x86_64/bin/salmon' # salmon software path
+SALMON_INDEX    = '/home/lanhui/brain/Salmon/salmon_index'
+TRANSCRIPTOME   = '/home/lanhui/brain/Salmon/Arabidopsis_thaliana.TAIR10.cdna.all.fa'
+SALMON_MAP_RESULT_DIR = '../Data/temp/salmon_map_result'
+KMER            = 31
+
+# From download_and_map.py
+DAILY_MAP_NUMBER = 4   # download this many samples each time.  I have tested the values of 3, 4, 5, 8.
+MIN_FASTQ_FILE_SIZE = 200000000    # in bytes, approximately 200MB
+RNA_SEQ_INFO_FILE = '../Data/information/rnaseq_info_database.json'  # some data downloaded from ENA are not RNA-seq (they are ChIP-seq). Use this file to tell whether the file is RNA-seq
+DOWNLOADED_SRA_ID_LOG_FILE = '../Data/log/download_log.txt' # a list of downloaded SRA IDs
+IGNORED_SRA_ID_LOG_FILE = '../Data/log/download_log_small_sized_ids.txt'  # store SRA IDs with small file size.
+MAPPED_RDATA_DIR = '../Data/R/Mapped/public'          # mapped RNA-seq (file names ended with _quant.txt) go here
+RAW_RDATA_DIR    = '/disk1/Data/R/Raw'                # downloaded files go here, was "../Data/R/Raw" (now almost full).
+
+# From update_network.py
+# Don'T change the following paths and names
+HISTORY_DIR       = '../Data/history/edges/many_targets' # each edge file contains edges for many targets
+HISTORY_DIR2      = '../Data/history/edges/one_target'   # edges.txt.* files are here, all edge files have the name edges.txt.*, the leading string 'edges.txt' must be present.
+TIMESTAMP_FILE    = '../Data/log/file_timestamp.txt'     # record last modified time of several important files
+SAMPLE_SIZE_FILE  = '../Data/log/total.samples.txt'      # each line contains a date and the number of samples on and after that date
+TEMP_DIR          = '../Data/temp'
+
+PARAMETER_FOR_BUILDCMATRIX = '../Data/parameter/parameter_for_buildCmatrix.txt'
+PARAMETER_FOR_BUILDRMATRIX = '../Data/parameter/parameter_for_buildRmatrix.txt'
+PARAMETER_FOR_NET          = '../Data/parameter/parameter_for_net.txt'
+PARAMETER_FOR_NET_TRAVADB_STRESS      = '../Data/parameter/parameter_for_net_travadb_stress.txt'
+PARAMETER_FOR_NET_TRAVADB_MAP         = '../Data/parameter/parameter_for_net_travadb_map.txt'
+PARAMETER_FOR_NET_MILD_DROUGHT        = '../Data/parameter/parameter_for_net_mild_drought.txt'
+PARAMETER_FOR_NET_WIGGELAB_DIURNAL    = '../Data/parameter/parameter_for_net_wiggelab_diurnal.txt'
+
+BINDING_FILE               = '../Data/history/bind/binding.txt'
+TPM_FILE                   = '../Data/history/expr/TPM.txt' # gene expression data
+
+BUILDRMATRIX_RENEW_INTERVAL = 28 # check every 28 days for updating TPM.txt
+MIN_RNA_SEQ_INCREASE = -999 # minimum RNA-seq experiments needed when updating parameter_for_buildRmatrix.txt
+UPDATE_NETWORK_LOG_FILE  = '../Data/log/update.network.log.txt' # network update log. We should check this file from time to time.
+NEW_OR_UPDATED_CHIP_FILE = '../Data/log/new.or.updated.chip.file.txt'
+
+RNA_SEQ_INFO_DATABASE   = '../Data/information/rnaseq_info_database.txt' # same as RNA_SEQ_INFO_FILE
+RNA_SEQ_INFO_DATABASE_JSON   = '../Data/information/rnaseq_info_database.json'
+
+GENE_ID_FIRST_TWO_LETTERS = 'AT'
+MEMORY_STRENGTH = 365 # memory retention power (larger value means better memory)
+
+#
+MAPPED_CDATA_DIR = '../Data/C/Mapped' # mapped ChIp-seq data
+
+# Used in merge_edges.py
+EDGE_POOL_DIR = '../Data/history/edge_pool'
+MERGED_EDGE_FILE = '../Data/temp/edges.txt'
+SQLITE_EDGE_FILE = '../Data/temp/edges.sqlite'
+DIFF_EDGE_FILE = '../Data/temp/edges-diff.txt' # the difference between two edge files from yesterday and from today
+
+TARGET_TF_FILE = '../Data/information/target_tf.txt'
diff --git a/Code/merge_edges.py b/Code/merge_edges.py
index 6bbd2f0..872faa9 100644
--- a/Code/merge_edges.py
+++ b/Code/merge_edges.py
@@ -23,6 +23,7 @@
 import os, operator, sys, math, datetime, glob
 from log import write_log_file
 from configure import EDGE_POOL_DIR, MERGED_EDGE_FILE, SQLITE_EDGE_FILE, UPDATE_NETWORK_LOG_FILE
+from utils import make_paths
 import sqlite3
 
 def get_number_of_RNAseq_ids(s):
@@ -134,6 +135,9 @@ def make_new_edge(d):
     
 
 ##main
+
+make_paths(EDGE_POOL_DIR)
+
 write_log_file('[merge_edges.py] Go through all edge files in the edge pool %s.' % (EDGE_POOL_DIR) , UPDATE_NETWORK_LOG_FILE)
 d = {} # d will contain all edges computed so far, where the key is TargetGeneID_TFGeneID, and the value is a list of tuples.  Each tuple is a historical edge.
 file_count = 0
diff --git a/Code/requirements.txt b/Code/requirements.txt
new file mode 100644
index 0000000..4d07dfe
--- /dev/null
+++ b/Code/requirements.txt
@@ -0,0 +1 @@
+networkx
-- 
cgit v1.2.1


From 84ecd1cadec670fb1e7dcbbdae04a982e77494da Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Sun, 11 Aug 2024 15:02:46 +0800
Subject: Remove temporary file

---
 Code/.#update_network_by_force.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 Code/.#update_network_by_force.py

(limited to 'Code')

diff --git a/Code/.#update_network_by_force.py b/Code/.#update_network_by_force.py
deleted file mode 120000
index 212abbe..0000000
--- a/Code/.#update_network_by_force.py
+++ /dev/null
@@ -1 +0,0 @@
-lanhui@lh-ubuntu22.3552:1722838551
\ No newline at end of file
-- 
cgit v1.2.1


From 9421dea6c34d2302df8b1c2a69b4f82afaedf70c Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Sun, 11 Aug 2024 15:15:02 +0800
Subject: Required Python modules

---
 Code/requirements.txt | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Code')

diff --git a/Code/requirements.txt b/Code/requirements.txt
index 4d07dfe..bd9cf96 100644
--- a/Code/requirements.txt
+++ b/Code/requirements.txt
@@ -1 +1,6 @@
+# install command: pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 networkx
+flask
+pylablib
+pyBigWig
+scipy
-- 
cgit v1.2.1


From 2f3485a486100d1731229ac018f24cee33734777 Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Fri, 16 Aug 2024 15:59:54 +0800
Subject: Count all downloaded RUNs

---
 Code/count_runs.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 Code/count_runs.py

(limited to 'Code')

diff --git a/Code/count_runs.py b/Code/count_runs.py
new file mode 100644
index 0000000..c254c31
--- /dev/null
+++ b/Code/count_runs.py
@@ -0,0 +1,27 @@
+# Purpose: count the total number of unique run IDs in all TPM files
+# Usage: python3 count_runs.py
+# 16 Aug 2024, zjnu, hui
+
+import glob, gzip
+
+runs = set()
+
+for filename in glob.glob('../Data/history/expr/TPM*'):
+    print(filename)
+    if filename.endswith('txt'):
+        with open(filename) as f:
+            line = f.readlines()[0]
+            line = line.strip()
+            lst = line.split('\t')
+            for runid in lst[1:]:
+                runs.add(runid)
+    elif filename.endswith('gz'):
+        with gzip.open(filename, 'rt') as f:
+            line = f.readlines()[0]
+            line = line.strip()
+            lst = line.split('\t')
+            for runid in lst[1:]:
+                runs.add(runid)
+
+print(runs)
+print('Total unique run IDs: %d' % len(runs))
-- 
cgit v1.2.1


From f4ce5d7a1b5fa133677219c951ff01d56ce4cb44 Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Sat, 17 Aug 2024 10:11:30 +0800
Subject: merge all TPM files and put the merged file in folder assemble

---
 Code/mergeTPM.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 Code/mergeTPM.py

(limited to 'Code')

diff --git a/Code/mergeTPM.py b/Code/mergeTPM.py
new file mode 100644
index 0000000..b7ae49b
--- /dev/null
+++ b/Code/mergeTPM.py
@@ -0,0 +1,96 @@
+# Purpose: merge all TPM files in PATH_TO_TPM and put the merge file to PATH_TO_TPM/assemble/
+# Usage: python3 mergeTPM.py
+# 17 Aug 2024, zjnu, hui
+
+import os, glob, gzip
+
+PATH_TO_TPM = '../Data/history/expr/'
+
+tpm_files_to_include = glob.glob(os.path.join(PATH_TO_TPM, 'TPM*'))
+#tpm_files_to_include = ['../Data/history/expr/TPM.inhouse.diurnal.txt', '../Data/history/expr/TPM.20190919.txt']
+
+# Get all run IDs
+run_ids = set()
+for filename in tpm_files_to_include:
+    print(filename)
+    if filename.endswith('txt'):
+        with open(filename) as f:
+            line = f.readlines()[0]
+            line = line.strip()
+            lst = line.split('\t')
+            for runid in lst[1:]:
+                run_ids.add(runid)
+    elif filename.endswith('gz'):
+        with gzip.open(filename, 'rt') as f:
+            line = f.readlines()[0]
+            line = line.strip()
+            lst = line.split('\t')
+            for runid in lst[1:]:
+                run_ids.add(runid)
+
+print('Total unique run IDs: %d' % len(run_ids))
+
+# Get gene IDs
+gene_ids = []
+with open(os.path.join(PATH_TO_TPM, 'TPM.txt')) as f:
+    for line in f:
+        line = line.strip()
+        if line.startswith('AT'):
+            lst = line.split('\t')
+            gene_ids.append(lst[0])
+
+assert len(gene_ids) == 33602
+
+
+# Assemble gene expressions
+g = {} # {'run':{'g1':1, 'g2':2}}
+def populate_dictionary(d, lines):
+    line = lines[0].strip()
+    runs = line.split('\t')[1:]
+    for line in lines[1:]:
+        line = line.strip()
+        lst = line.split('\t')
+        gene = lst[0]
+        expression_levels = lst[1:]
+        run_index = 0
+        for x in expression_levels:
+            run = runs[run_index]
+            if not run in d:
+                d[run] = {}
+            d[run][gene] = x
+            run_index += 1
+    
+
+for filename in tpm_files_to_include:
+    print('Assemble ' + filename)
+    if filename.endswith('txt'):
+        with open(filename) as f:
+            lines = f.readlines()
+            populate_dictionary(g, lines)            
+    elif filename.endswith('gz'):
+        with gzip.open(filename, 'rt') as f:
+            lines = f.readlines()
+            populate_dictionary(g, lines)            
+    
+
+# Write to TPM.assemble.number.txt
+run_ids_sorted = sorted(list(run_ids))
+assemble_dir = os.path.join(PATH_TO_TPM, 'assemble')
+if not os.path.exists(assemble_dir):
+    os.mkdir(assemble_dir)
+fname = os.path.join(assemble_dir, 'assemble.%d.txt' % (len(run_ids)))
+print(f'Write to {fname}')
+with open(fname, 'w') as f:
+    # write first line
+    lst1 = ['gene_id'] + run_ids_sorted
+    f.write('\t'.join(lst1) + '\n')
+    for gene in gene_ids:
+        lst2 = [gene]
+        for run in run_ids_sorted:
+            if gene in g[run]:
+                lst2.append(g[run][gene])
+            else:
+                lst2.append('NA')
+        f.write('\t'.join(lst2) + '\n')
+        assert len(lst1) == len(lst2)
+
-- 
cgit v1.2.1


From f312d8bbef849370aacefba1f6e8b1b420047cbb Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Sat, 17 Aug 2024 10:12:00 +0800
Subject: [mergeTPM.py] correct grammar error

---
 Code/mergeTPM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Code')

diff --git a/Code/mergeTPM.py b/Code/mergeTPM.py
index b7ae49b..f37f438 100644
--- a/Code/mergeTPM.py
+++ b/Code/mergeTPM.py
@@ -1,4 +1,4 @@
-# Purpose: merge all TPM files in PATH_TO_TPM and put the merge file to PATH_TO_TPM/assemble/
+# Purpose: merge all TPM files in PATH_TO_TPM and put the merged file to PATH_TO_TPM/assemble/
 # Usage: python3 mergeTPM.py
 # 17 Aug 2024, zjnu, hui
 
-- 
cgit v1.2.1


From 965c52946eef4cd76f0a4119be63d82348cfee23 Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Mon, 26 Aug 2024 16:20:34 +0800
Subject: Use R package jsonlite instead of rjson, to support the 'digits'
 option, which allows keeping fewer digits after decimal, thus saving disk
 storage space

---
 Code/getTF.py             | 19 +++++++++++++++++++
 Code/slice_TPM_to_JSON.R  | 20 ++++++++++++++++++++
 Code/slice_TPM_to_JSON.py |  4 ++--
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 Code/getTF.py
 create mode 100644 Code/slice_TPM_to_JSON.R

(limited to 'Code')

diff --git a/Code/getTF.py b/Code/getTF.py
new file mode 100644
index 0000000..1090bd2
--- /dev/null
+++ b/Code/getTF.py
@@ -0,0 +1,19 @@
+import json
+
+tfs = set()
+
+with open('../Data/information/target_tf.txt') as f:
+    for line in f:
+        line = line.strip()
+        lst = line.split('\t')
+        tf = lst[1]
+        tfs.add(tf)
+
+with open('../Data/information/target_tf_agris.txt') as f:
+    for line in f:
+        line = line.strip()
+        lst = line.split('\t')
+        tf = lst[1]
+        tfs.add(tf)
+
+print(json.dumps(sorted(list(tfs))))
diff --git a/Code/slice_TPM_to_JSON.R b/Code/slice_TPM_to_JSON.R
new file mode 100644
index 0000000..e7021ef
--- /dev/null
+++ b/Code/slice_TPM_to_JSON.R
@@ -0,0 +1,20 @@
+
+#library(rjson)
+library(jsonlite)
+dir.name <- '../Data/history/expr/json'
+tpm.file <- '../Data/history/expr/assemble/TPM.6604.txt'
+take.log <- 'YES'
+X <- read.table(tpm.file, header=T, check.names=FALSE, sep="\t")
+gene.id <- as.vector(X[,1])
+X[,1] <- NULL # remove first column
+if (take.log == 'YES') {
+    X <- log(X+1)
+}
+if (!dir.exists(dir.name)) {
+    dir.create(dir.name)
+}
+for (i in 1:dim(X)[1]) {
+            y <- toJSON(X[i,], digits=I(3))
+    file.name = paste(dir.name, paste(gene.id[i], 'json', sep='.'), sep='/')
+    cat(y, file=file.name)
+}
diff --git a/Code/slice_TPM_to_JSON.py b/Code/slice_TPM_to_JSON.py
index e597b78..618310f 100644
--- a/Code/slice_TPM_to_JSON.py
+++ b/Code/slice_TPM_to_JSON.py
@@ -127,7 +127,7 @@ def make_json_file(expr_dict, dir_name, glb_param_dict):
 
 def make_json_file_using_r(dir_name, glb_param_dict): # use r script to make it faster
     r_code = '''
-	library(rjson)
+	library(jsonlite)
 	dir.name <- '%s'
 	tpm.file <- '%s'
 	take.log <- '%s'
@@ -141,7 +141,7 @@ def make_json_file_using_r(dir_name, glb_param_dict): # use r script to make it
 	    dir.create(dir.name)
 	}
 	for (i in 1:dim(X)[1]) {
-	    y <- toJSON(X[i,])
+	    y <- toJSON(X[i,], digits=I(3))
 	    file.name = paste(dir.name, paste(gene.id[i], 'json', sep='.'), sep='/')
 	    cat(y, file=file.name)
 	}
-- 
cgit v1.2.1


From fa719fee177152cc9f5d4632c6aa68b91336f8df Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Mon, 26 Aug 2024 17:50:12 +0800
Subject: JSON file should look like '{key:value}' but not ['{key:value}']

---
 Code/slice_TPM_to_JSON.R  | 20 --------------------
 Code/slice_TPM_to_JSON.py |  2 +-
 2 files changed, 1 insertion(+), 21 deletions(-)
 delete mode 100644 Code/slice_TPM_to_JSON.R

(limited to 'Code')

diff --git a/Code/slice_TPM_to_JSON.R b/Code/slice_TPM_to_JSON.R
deleted file mode 100644
index e7021ef..0000000
--- a/Code/slice_TPM_to_JSON.R
+++ /dev/null
@@ -1,20 +0,0 @@
-
-#library(rjson)
-library(jsonlite)
-dir.name <- '../Data/history/expr/json'
-tpm.file <- '../Data/history/expr/assemble/TPM.6604.txt'
-take.log <- 'YES'
-X <- read.table(tpm.file, header=T, check.names=FALSE, sep="\t")
-gene.id <- as.vector(X[,1])
-X[,1] <- NULL # remove first column
-if (take.log == 'YES') {
-    X <- log(X+1)
-}
-if (!dir.exists(dir.name)) {
-    dir.create(dir.name)
-}
-for (i in 1:dim(X)[1]) {
-            y <- toJSON(X[i,], digits=I(3))
-    file.name = paste(dir.name, paste(gene.id[i], 'json', sep='.'), sep='/')
-    cat(y, file=file.name)
-}
diff --git a/Code/slice_TPM_to_JSON.py b/Code/slice_TPM_to_JSON.py
index 618310f..b2edfa2 100644
--- a/Code/slice_TPM_to_JSON.py
+++ b/Code/slice_TPM_to_JSON.py
@@ -141,7 +141,7 @@ def make_json_file_using_r(dir_name, glb_param_dict): # use r script to make it
 	    dir.create(dir.name)
 	}
 	for (i in 1:dim(X)[1]) {
-	    y <- toJSON(X[i,], digits=I(3))
+	    y <- toJSON(unbox(X[i,]), digits=I(3))
 	    file.name = paste(dir.name, paste(gene.id[i], 'json', sep='.'), sep='/')
 	    cat(y, file=file.name)
 	}
-- 
cgit v1.2.1


From 837a291e6a1816920c7116410dd1e0df9fd3eaf7 Mon Sep 17 00:00:00 2001
From: Lan Hui <lanhui@zjnu.edu.cn>
Date: Wed, 28 Aug 2024 14:23:42 +0800
Subject: Pretty print JSON

---
 Code/slice_TPM_to_JSON.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Code')

diff --git a/Code/slice_TPM_to_JSON.py b/Code/slice_TPM_to_JSON.py
index b2edfa2..7509f00 100644
--- a/Code/slice_TPM_to_JSON.py
+++ b/Code/slice_TPM_to_JSON.py
@@ -141,7 +141,7 @@ def make_json_file_using_r(dir_name, glb_param_dict): # use r script to make it
 	    dir.create(dir.name)
 	}
 	for (i in 1:dim(X)[1]) {
-	    y <- toJSON(unbox(X[i,]), digits=I(3))
+	    y <- toJSON(unbox(X[i,]), digits=I(3), pretty=TRUE)
 	    file.name = paste(dir.name, paste(gene.id[i], 'json', sep='.'), sep='/')
 	    cat(y, file=file.name)
 	}
-- 
cgit v1.2.1