diff options
author | Hui Lan <lanhui@zjnu.edu.cn> | 2019-11-01 20:51:19 +0800 |
---|---|---|
committer | Hui Lan <lanhui@zjnu.edu.cn> | 2019-11-01 20:51:19 +0800 |
commit | a8f6a99bb3d3dba85705ed7df93145c28168d659 (patch) | |
tree | b21a324eb857b0b236ea76f22692e18420258e45 /app/pickle_idea.py |
englishpal: first commit
Diffstat (limited to 'app/pickle_idea.py')
-rw-r--r-- | app/pickle_idea.py | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/app/pickle_idea.py b/app/pickle_idea.py new file mode 100644 index 0000000..725aebc --- /dev/null +++ b/app/pickle_idea.py @@ -0,0 +1,65 @@ +# Purpose: dictionary & pickle as a simple means of database. +# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency. + +import pickle + + +def lst2dict(lst, d): + ''' + Store the information in list lst to dictionary d. + Note: nothing is returned. + + ''' + for x in lst: + word = x[0] + freq = x[1] + if not word in d: + d[word] = freq + else: + d[word] += freq + + +def dict2lst(d): + return list(d.items()) # a list of (key, value) pairs + + +def merge_frequency(lst1, lst2): + d = {} + lst2dict(lst1, d) + lst2dict(lst2, d) + return d + + +def load_record(pickle_fname): + f = open(pickle_fname, 'rb') + d = pickle.load(f) + f.close() + return d + + +def save_frequency_to_pickle(d, pickle_fname): + f = open(pickle_fname, 'wb') + #exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w'] + exclusion_lst = [] + d2 = {} + for k in d: + if not k in exclusion_lst and not k.isnumeric() and len(k) > 1: + d2[k] = d[k] + pickle.dump(d2, f) + f.close() + + + +if __name__ == '__main__': + + lst1 = [('apple',2), ('banana',1)] + d = {} + lst2dict(lst1, d) # d will change + save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database + + + lst2 = [('banana',2), ('orange', 4)] + d = load_record('frequency.p') + lst1 = dict2lst(d) + d = merge_frequency(lst2, lst1) + print(d) |