summaryrefslogtreecommitdiff
path: root/Code/count_runs.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2025-03-09 16:27:16 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2025-03-09 16:27:16 +0800
commit92ef45014d95e029f227e9332ac0e2bb80541c83 (patch)
treee5fb2cc889c4e12164b67a33ab00cb1d49f2d748 /Code/count_runs.py
parentd6b28e7a6e11de2622988488badc7a07c662e2c4 (diff)
parent837a291e6a1816920c7116410dd1e0df9fd3eaf7 (diff)
Sync changes made on my Ubuntu desktop machine i7-13700H
Diffstat (limited to 'Code/count_runs.py')
-rw-r--r--Code/count_runs.py27
1 files changed, 27 insertions, 0 deletions
diff --git a/Code/count_runs.py b/Code/count_runs.py
new file mode 100644
index 0000000..c254c31
--- /dev/null
+++ b/Code/count_runs.py
@@ -0,0 +1,27 @@
+# Purpose: count the total number of unique run IDs in all TPM files
+# Usage: python3 count_runs.py
+# 16 Aug 2024, zjnu, hui
+
+import glob, gzip
+
+runs = set()
+
+for filename in glob.glob('../Data/history/expr/TPM*'):
+ print(filename)
+ if filename.endswith('txt'):
+ with open(filename) as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+ elif filename.endswith('gz'):
+ with gzip.open(filename, 'rt') as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+
+print(runs)
+print('Total unique run IDs: %d' % len(runs))