blob: c254c314fa9095a1d1234ecfe9d8b40b3b46718b (
plain)
| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
 | # Purpose: count the total number of unique run IDs in all TPM files
# Usage: python3 count_runs.py
# 16 Aug 2024, zjnu, hui
import glob, gzip
runs = set()
for filename in glob.glob('../Data/history/expr/TPM*'):
    print(filename)
    if filename.endswith('txt'):
        with open(filename) as f:
            line = f.readlines()[0]
            line = line.strip()
            lst = line.split('\t')
            for runid in lst[1:]:
                runs.add(runid)
    elif filename.endswith('gz'):
        with gzip.open(filename, 'rt') as f:
            line = f.readlines()[0]
            line = line.strip()
            lst = line.split('\t')
            for runid in lst[1:]:
                runs.add(runid)
print(runs)
print('Total unique run IDs: %d' % len(runs))
 |