blob: c254c314fa9095a1d1234ecfe9d8b40b3b46718b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
# Purpose: count the total number of unique run IDs in all TPM files
# Usage: python3 count_runs.py
# 16 Aug 2024, zjnu, hui
import glob, gzip
runs = set()
for filename in glob.glob('../Data/history/expr/TPM*'):
print(filename)
if filename.endswith('txt'):
with open(filename) as f:
line = f.readlines()[0]
line = line.strip()
lst = line.split('\t')
for runid in lst[1:]:
runs.add(runid)
elif filename.endswith('gz'):
with gzip.open(filename, 'rt') as f:
line = f.readlines()[0]
line = line.strip()
lst = line.split('\t')
for runid in lst[1:]:
runs.add(runid)
print(runs)
print('Total unique run IDs: %d' % len(runs))
|