summaryrefslogtreecommitdiff
path: root/Code/delete_not_used_fastq.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-12-04 19:03:19 +0800
commit97fdefab064f63642fa3ece05b807d29b459df31 (patch)
treea058530023224f3e35b1783996f3530c80c04bc5 /Code/delete_not_used_fastq.py
brain: add python and R code to local repository.
Diffstat (limited to 'Code/delete_not_used_fastq.py')
-rw-r--r--Code/delete_not_used_fastq.py43
1 files changed, 43 insertions, 0 deletions
diff --git a/Code/delete_not_used_fastq.py b/Code/delete_not_used_fastq.py
new file mode 100644
index 0000000..67d368b
--- /dev/null
+++ b/Code/delete_not_used_fastq.py
@@ -0,0 +1,43 @@
+# Usage: python delete_not_used_fastq.py
+# Edit DIR, the directory containing all fastq.gz files.
+# Also generate USED_IDS.
+# Purpose: list fastq.gz files that are not used. Move them to to.be.deleted folder.
+#
+# 20 Apr 2017, slcu, hui
+
+import glob, os
+
+def read_ids(fname):
+ f = open(fname)
+ d = {}
+ for line in f:
+ line = line.strip()
+ d[line] = 1
+ f.close()
+ return d
+
+DIR = '/home/hui/network/R/Raw'
+destDIR = os.path.join(DIR, 'to.be.deleted')
+USED_IDS = '/home/hui/network/v03/Data/temp/used.sra.ids.txt' # generated by grep @ /home/hui/network/v03/Data/parameter/parameter_for_buildRmatrix.txt | grep 'SRR\|ERR\|DRR' | perl -pe 'substr($_, 0, 3) = ""; s/X+$//'
+
+
+if not os.path.isdir(destDIR):
+ os.makedirs(destDIR)
+
+ids = read_ids(USED_IDS)
+flst = glob.glob(os.path.join(DIR, '*.gz'))
+
+print('file\tsize.in.G')
+sum = 0
+for path in flst:
+ fname = os.path.basename(path)
+ if '_' in fname:
+ i = fname[0:fname.find('_')]
+ else:
+ i = fname[0:fname.find('.')]
+ if not i in ids:
+ print('%s\t%4.2f' % (path, 1.0*os.path.getsize(path)/(1024*1024*1024)))
+ sum += 1.0*os.path.getsize(path)/(1024*1024*1024)
+ cmd = 'mv %s %s' % (path, destDIR)
+ os.system(cmd)
+print('Total %4.2f G moved to %s' % (sum, destDIR))