summaryrefslogtreecommitdiff
path: root/Code/count_runs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/count_runs.py')
-rw-r--r--Code/count_runs.py27
1 files changed, 27 insertions, 0 deletions
diff --git a/Code/count_runs.py b/Code/count_runs.py
new file mode 100644
index 0000000..c254c31
--- /dev/null
+++ b/Code/count_runs.py
@@ -0,0 +1,27 @@
+# Purpose: count the total number of unique run IDs in all TPM files
+# Usage: python3 count_runs.py
+# 16 Aug 2024, zjnu, hui
+
+import glob, gzip
+
+runs = set()
+
+for filename in glob.glob('../Data/history/expr/TPM*'):
+ print(filename)
+ if filename.endswith('txt'):
+ with open(filename) as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+ elif filename.endswith('gz'):
+ with gzip.open(filename, 'rt') as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+
+print(runs)
+print('Total unique run IDs: %d' % len(runs))