summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLan Hui <lanhui@zjnu.edu.cn>2024-08-16 15:59:54 +0800
committerLan Hui <lanhui@zjnu.edu.cn>2024-08-16 15:59:54 +0800
commit2f3485a486100d1731229ac018f24cee33734777 (patch)
tree097bc30bb764ea41c36ad0924314f08c309316a8
parent9421dea6c34d2302df8b1c2a69b4f82afaedf70c (diff)
Count all downloaded RUNs
-rw-r--r--Code/count_runs.py27
1 files changed, 27 insertions, 0 deletions
diff --git a/Code/count_runs.py b/Code/count_runs.py
new file mode 100644
index 0000000..c254c31
--- /dev/null
+++ b/Code/count_runs.py
@@ -0,0 +1,27 @@
+# Purpose: count the total number of unique run IDs in all TPM files
+# Usage: python3 count_runs.py
+# 16 Aug 2024, zjnu, hui
+
+import glob, gzip
+
+runs = set()
+
+for filename in glob.glob('../Data/history/expr/TPM*'):
+ print(filename)
+ if filename.endswith('txt'):
+ with open(filename) as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+ elif filename.endswith('gz'):
+ with gzip.open(filename, 'rt') as f:
+ line = f.readlines()[0]
+ line = line.strip()
+ lst = line.split('\t')
+ for runid in lst[1:]:
+ runs.add(runid)
+
+print(runs)
+print('Total unique run IDs: %d' % len(runs))