diff options
author | Lan Hui <lanhui@zjnu.edu.cn> | 2024-08-16 15:59:54 +0800 |
---|---|---|
committer | Lan Hui <lanhui@zjnu.edu.cn> | 2024-08-16 15:59:54 +0800 |
commit | 2f3485a486100d1731229ac018f24cee33734777 (patch) | |
tree | 097bc30bb764ea41c36ad0924314f08c309316a8 | |
parent | 9421dea6c34d2302df8b1c2a69b4f82afaedf70c (diff) |
Count all downloaded RUNs
-rw-r--r-- | Code/count_runs.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/Code/count_runs.py b/Code/count_runs.py new file mode 100644 index 0000000..c254c31 --- /dev/null +++ b/Code/count_runs.py @@ -0,0 +1,27 @@ +# Purpose: count the total number of unique run IDs in all TPM files +# Usage: python3 count_runs.py +# 16 Aug 2024, zjnu, hui + +import glob, gzip + +runs = set() + +for filename in glob.glob('../Data/history/expr/TPM*'): + print(filename) + if filename.endswith('txt'): + with open(filename) as f: + line = f.readlines()[0] + line = line.strip() + lst = line.split('\t') + for runid in lst[1:]: + runs.add(runid) + elif filename.endswith('gz'): + with gzip.open(filename, 'rt') as f: + line = f.readlines()[0] + line = line.strip() + lst = line.split('\t') + for runid in lst[1:]: + runs.add(runid) + +print(runs) +print('Total unique run IDs: %d' % len(runs)) |