diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2019-12-04 19:03:19 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2019-12-04 19:03:19 +0800 |
commit | 97fdefab064f63642fa3ece05b807d29b459df31 (patch) | |
tree | a058530023224f3e35b1783996f3530c80c04bc5 /Code/delete_not_used_fastq.py |
brain: add python and R code to local repository.
Diffstat (limited to 'Code/delete_not_used_fastq.py')
-rw-r--r-- | Code/delete_not_used_fastq.py | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/Code/delete_not_used_fastq.py b/Code/delete_not_used_fastq.py new file mode 100644 index 0000000..67d368b --- /dev/null +++ b/Code/delete_not_used_fastq.py @@ -0,0 +1,43 @@ +# Usage: python delete_not_used_fastq.py +# Edit DIR, the directory containing all fastq.gz files. +# Also generate USED_IDS. +# Purpose: list fastq.gz files that are not used. Move them to to.be.deleted folder. +# +# 20 Apr 2017, slcu, hui + +import glob, os + +def read_ids(fname): + f = open(fname) + d = {} + for line in f: + line = line.strip() + d[line] = 1 + f.close() + return d + +DIR = '/home/hui/network/R/Raw' +destDIR = os.path.join(DIR, 'to.be.deleted') +USED_IDS = '/home/hui/network/v03/Data/temp/used.sra.ids.txt' # generated by grep @ /home/hui/network/v03/Data/parameter/parameter_for_buildRmatrix.txt | grep 'SRR\|ERR\|DRR' | perl -pe 'substr($_, 0, 3) = ""; s/X+$//' + + +if not os.path.isdir(destDIR): + os.makedirs(destDIR) + +ids = read_ids(USED_IDS) +flst = glob.glob(os.path.join(DIR, '*.gz')) + +print('file\tsize.in.G') +sum = 0 +for path in flst: + fname = os.path.basename(path) + if '_' in fname: + i = fname[0:fname.find('_')] + else: + i = fname[0:fname.find('.')] + if not i in ids: + print('%s\t%4.2f' % (path, 1.0*os.path.getsize(path)/(1024*1024*1024))) + sum += 1.0*os.path.getsize(path)/(1024*1024*1024) + cmd = 'mv %s %s' % (path, destDIR) + os.system(cmd) +print('Total %4.2f G moved to %s' % (sum, destDIR)) |