blob: 67d368b3db3318a0a729f0e6a203409fb623d8c2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
# Usage: python delete_not_used_fastq.py
# Edit DIR, the directory containing all fastq.gz files.
# Also generate USED_IDS.
# Purpose: list fastq.gz files that are not used. Move them to to.be.deleted folder.
#
# 20 Apr 2017, slcu, hui
import glob, os
def read_ids(fname):
f = open(fname)
d = {}
for line in f:
line = line.strip()
d[line] = 1
f.close()
return d
DIR = '/home/hui/network/R/Raw'
destDIR = os.path.join(DIR, 'to.be.deleted')
USED_IDS = '/home/hui/network/v03/Data/temp/used.sra.ids.txt' # generated by grep @ /home/hui/network/v03/Data/parameter/parameter_for_buildRmatrix.txt | grep 'SRR\|ERR\|DRR' | perl -pe 'substr($_, 0, 3) = ""; s/X+$//'
if not os.path.isdir(destDIR):
os.makedirs(destDIR)
ids = read_ids(USED_IDS)
flst = glob.glob(os.path.join(DIR, '*.gz'))
print('file\tsize.in.G')
sum = 0
for path in flst:
fname = os.path.basename(path)
if '_' in fname:
i = fname[0:fname.find('_')]
else:
i = fname[0:fname.find('.')]
if not i in ids:
print('%s\t%4.2f' % (path, 1.0*os.path.getsize(path)/(1024*1024*1024)))
sum += 1.0*os.path.getsize(path)/(1024*1024*1024)
cmd = 'mv %s %s' % (path, destDIR)
os.system(cmd)
print('Total %4.2f G moved to %s' % (sum, destDIR))
|