brain: add python and R code to local repository.

author: Hui Lan <lanhui@zjnu.edu.cn> 2019-12-04 19:03:19 +0800
committer: Hui Lan <lanhui@zjnu.edu.cn> 2019-12-04 19:03:19 +0800
commit: 97fdefab064f63642fa3ece05b807d29b459df31 (patch)
tree: a058530023224f3e35b1783996f3530c80c04bc5 /Code/count_word.py
1 files changed, 36 insertions, 0 deletions
diff --git a/Code/count_word.py b/Code/count_word.py
new file mode 100644
index 0000000..cd1d3c6
--- /dev/null
+++ b/Code/count_word.py
@@ -0,0 +1,36 @@
+# Usage: python count_word.py /home/hui/network/v03/Data/information/rnaseq_info_database.txt
+#
+# Purpose: get all words in a file, order them by their frequencies.
+#
+# 20 Apr 2017, slcu, hui
+
+FILE_NAME = '/home/hui/network/v03/Data/information/rnaseq_info_database.txt'
+
+from collections import Counter
+import operator
+import string
+import sys
+
+FILE_NAME = sys.argv[1]
+
+def remove_punctuation(s):
+    result = ''
+    for x in s.split():
+        result += ' ' + x.translate(None, string.punctuation)
+    return result.strip()
+
+f = open(FILE_NAME)
+lines = f.readlines()
+f.close()
+yourtext = ''
+for line in lines[1:]: # don't include header line
+    line = line.strip()
+    lst = line.split('\t')
+    for x in lst[4:]: # only consider fields from 5th column.
+        yourtext += ' ' + remove_punctuation(x.lower())
+
+d = Counter(yourtext.split())
+sd = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
+for t in sd:
+    k = t[0]
+    print('%s\t\t%s' % (k, d[k]))
author	Hui Lan <lanhui@zjnu.edu.cn>	2019-12-04 19:03:19 +0800
committer	Hui Lan <lanhui@zjnu.edu.cn>	2019-12-04 19:03:19 +0800
commit	97fdefab064f63642fa3ece05b807d29b459df31 (patch)
tree	a058530023224f3e35b1783996f3530c80c04bc5 /Code/count_word.py