summaryrefslogtreecommitdiff
path: root/app/pickle_idea.py
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-11-01 20:51:19 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-11-01 20:51:19 +0800
commita8f6a99bb3d3dba85705ed7df93145c28168d659 (patch)
treeb21a324eb857b0b236ea76f22692e18420258e45 /app/pickle_idea.py
englishpal: first commit
Diffstat (limited to 'app/pickle_idea.py')
-rw-r--r--app/pickle_idea.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/app/pickle_idea.py b/app/pickle_idea.py
new file mode 100644
index 0000000..725aebc
--- /dev/null
+++ b/app/pickle_idea.py
@@ -0,0 +1,65 @@
+# Purpose: dictionary & pickle as a simple means of database.
+# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
+
+import pickle
+
+
+def lst2dict(lst, d):
+ '''
+ Store the information in list lst to dictionary d.
+ Note: nothing is returned.
+
+ '''
+ for x in lst:
+ word = x[0]
+ freq = x[1]
+ if not word in d:
+ d[word] = freq
+ else:
+ d[word] += freq
+
+
+def dict2lst(d):
+ return list(d.items()) # a list of (key, value) pairs
+
+
+def merge_frequency(lst1, lst2):
+ d = {}
+ lst2dict(lst1, d)
+ lst2dict(lst2, d)
+ return d
+
+
+def load_record(pickle_fname):
+ f = open(pickle_fname, 'rb')
+ d = pickle.load(f)
+ f.close()
+ return d
+
+
+def save_frequency_to_pickle(d, pickle_fname):
+ f = open(pickle_fname, 'wb')
+ #exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
+ exclusion_lst = []
+ d2 = {}
+ for k in d:
+ if not k in exclusion_lst and not k.isnumeric() and len(k) > 1:
+ d2[k] = d[k]
+ pickle.dump(d2, f)
+ f.close()
+
+
+
+if __name__ == '__main__':
+
+ lst1 = [('apple',2), ('banana',1)]
+ d = {}
+ lst2dict(lst1, d) # d will change
+ save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
+
+
+ lst2 = [('banana',2), ('orange', 4)]
+ d = load_record('frequency.p')
+ lst1 = dict2lst(d)
+ d = merge_frequency(lst2, lst1)
+ print(d)