From a8f6a99bb3d3dba85705ed7df93145c28168d659 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Fri, 1 Nov 2019 20:51:19 +0800 Subject: englishpal: first commit --- app/pickle_idea2.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 app/pickle_idea2.py (limited to 'app/pickle_idea2.py') diff --git a/app/pickle_idea2.py b/app/pickle_idea2.py new file mode 100644 index 0000000..a1557ed --- /dev/null +++ b/app/pickle_idea2.py @@ -0,0 +1,74 @@ +# Purpose: dictionary & pickle as a simple means of database. +# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency. +# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates. + +import pickle +from datetime import datetime + +def lst2dict(lst, d): + ''' + Store the information in list lst to dictionary d. + Note: nothing is returned. + + ''' + for x in lst: + word = x[0] + dates = x[1] + if not word in d: + d[word] = dates + else: + d[word] += dates + + +def dict2lst(d): + if len(d) > 0: + keys = list(d.keys()) + if isinstance(d[keys[0]], int): + lst = [] + for k in d: + lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')])) + return lst + elif isinstance(d[keys[0]], list): + return list(d.items()) # a list of (key, value) pairs + + return [] + +def merge_frequency(lst1, lst2): + d = {} + lst2dict(lst1, d) + lst2dict(lst2, d) + return d + + +def load_record(pickle_fname): + f = open(pickle_fname, 'rb') + d = pickle.load(f) + f.close() + return d + + +def save_frequency_to_pickle(d, pickle_fname): + f = open(pickle_fname, 'wb') + exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w'] + d2 = {} + for k in d: + if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2: + d2[k] = list(sorted(set(d[k]))) + pickle.dump(d2, f) + f.close() + + + +if __name__ == '__main__': + + lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])] + d = {} + lst2dict(lst1, d) # d will change + save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database + + + lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])] + d = load_record('frequency.p') + lst1 = dict2lst(d) + d = merge_frequency(lst2, lst1) + print(d) -- cgit v1.2.1