diff options
Diffstat (limited to 'Code/count_runs.py')
-rw-r--r-- | Code/count_runs.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/Code/count_runs.py b/Code/count_runs.py new file mode 100644 index 0000000..c254c31 --- /dev/null +++ b/Code/count_runs.py @@ -0,0 +1,27 @@ +# Purpose: count the total number of unique run IDs in all TPM files +# Usage: python3 count_runs.py +# 16 Aug 2024, zjnu, hui + +import glob, gzip + +runs = set() + +for filename in glob.glob('../Data/history/expr/TPM*'): + print(filename) + if filename.endswith('txt'): + with open(filename) as f: + line = f.readlines()[0] + line = line.strip() + lst = line.split('\t') + for runid in lst[1:]: + runs.add(runid) + elif filename.endswith('gz'): + with gzip.open(filename, 'rt') as f: + line = f.readlines()[0] + line = line.strip() + lst = line.split('\t') + for runid in lst[1:]: + runs.add(runid) + +print(runs) +print('Total unique run IDs: %d' % len(runs)) |