# Usage: python delete_not_used_fastq.py # Edit DIR, the directory containing all fastq.gz files. # Also generate USED_IDS. # Purpose: list fastq.gz files that are not used. Move them to to.be.deleted folder. # # 20 Apr 2017, slcu, hui import glob, os def read_ids(fname): f = open(fname) d = {} for line in f: line = line.strip() d[line] = 1 f.close() return d DIR = '/home/hui/network/R/Raw' destDIR = os.path.join(DIR, 'to.be.deleted') USED_IDS = '/home/hui/network/v03/Data/temp/used.sra.ids.txt' # generated by grep @ /home/hui/network/v03/Data/parameter/parameter_for_buildRmatrix.txt | grep 'SRR\|ERR\|DRR' | perl -pe 'substr($_, 0, 3) = ""; s/X+$//' if not os.path.isdir(destDIR): os.makedirs(destDIR) ids = read_ids(USED_IDS) flst = glob.glob(os.path.join(DIR, '*.gz')) print('file\tsize.in.G') sum = 0 for path in flst: fname = os.path.basename(path) if '_' in fname: i = fname[0:fname.find('_')] else: i = fname[0:fname.find('.')] if not i in ids: print('%s\t%4.2f' % (path, 1.0*os.path.getsize(path)/(1024*1024*1024))) sum += 1.0*os.path.getsize(path)/(1024*1024*1024) cmd = 'mv %s %s' % (path, destDIR) os.system(cmd) print('Total %4.2f G moved to %s' % (sum, destDIR))