# Purpose: count the total number of unique run IDs in all TPM files # Usage: python3 count_runs.py # 16 Aug 2024, zjnu, hui import glob, gzip runs = set() for filename in glob.glob('../Data/history/expr/TPM*'): print(filename) if filename.endswith('txt'): with open(filename) as f: line = f.readlines()[0] line = line.strip() lst = line.split('\t') for runid in lst[1:]: runs.add(runid) elif filename.endswith('gz'): with gzip.open(filename, 'rt') as f: line = f.readlines()[0] line = line.strip() lst = line.split('\t') for runid in lst[1:]: runs.add(runid) print(runs) print('Total unique run IDs: %d' % len(runs))