summaryrefslogtreecommitdiff
path: root/Code/count_runs.py
blob: c254c314fa9095a1d1234ecfe9d8b40b3b46718b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Purpose: count the total number of unique run IDs in all TPM files
# Usage: python3 count_runs.py
# 16 Aug 2024, zjnu, hui

import glob, gzip

runs = set()

for filename in glob.glob('../Data/history/expr/TPM*'):
    print(filename)
    if filename.endswith('txt'):
        with open(filename) as f:
            line = f.readlines()[0]
            line = line.strip()
            lst = line.split('\t')
            for runid in lst[1:]:
                runs.add(runid)
    elif filename.endswith('gz'):
        with gzip.open(filename, 'rt') as f:
            line = f.readlines()[0]
            line = line.strip()
            lst = line.split('\t')
            for runid in lst[1:]:
                runs.add(runid)

print(runs)
print('Total unique run IDs: %d' % len(runs))