2021-04-06 16:22:03 +08:00
|
|
|
###########################################################################
|
|
|
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
|
|
|
# Written permission must be obtained from the author for commercial uses.
|
|
|
|
###########################################################################
|
|
|
|
|
|
|
|
# Purpose: dictionary & pickle as a simple means of database.
|
|
|
|
# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
|
|
|
|
|
|
|
|
import pickle
|
2021-05-31 08:48:03 +08:00
|
|
|
from datetime import datetime
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
|
|
|
|
def lst2dict(lst, d):
|
|
|
|
'''
|
|
|
|
Store the information in list lst to dictionary d.
|
|
|
|
Note: nothing is returned.
|
|
|
|
|
|
|
|
'''
|
|
|
|
for x in lst:
|
|
|
|
word = x[0]
|
|
|
|
freq = x[1]
|
2023-06-04 00:35:43 +08:00
|
|
|
if word not in d:
|
|
|
|
d[word] = freq
|
2021-04-06 16:22:03 +08:00
|
|
|
else:
|
|
|
|
d[word] += freq
|
|
|
|
|
|
|
|
|
|
|
|
def dict2lst(d):
|
|
|
|
return list(d.items()) # a list of (key, value) pairs
|
2023-06-04 00:35:43 +08:00
|
|
|
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
def merge_frequency(lst1, lst2):
|
|
|
|
d = {}
|
|
|
|
lst2dict(lst1, d)
|
|
|
|
lst2dict(lst2, d)
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def load_record(pickle_fname):
|
2023-06-04 00:35:43 +08:00
|
|
|
with open(pickle_fname, 'rb') as f:
|
2023-06-05 21:27:20 +08:00
|
|
|
d = pickle.load(f)
|
2021-04-06 16:22:03 +08:00
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def save_frequency_to_pickle(d, pickle_fname):
|
2023-06-04 00:35:43 +08:00
|
|
|
with open(pickle_fname, 'wb') as f:
|
|
|
|
#exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
|
|
|
|
exclusion_lst = []
|
|
|
|
d2 = {}
|
|
|
|
for k in d:
|
|
|
|
if not k in exclusion_lst and not k.isnumeric() and len(k) > 1:
|
|
|
|
d2[k] = d[k]
|
|
|
|
pickle.dump(d2, f)
|
2021-04-06 16:22:03 +08:00
|
|
|
|
2021-05-31 08:48:03 +08:00
|
|
|
def unfamiliar(path,word):
|
2023-06-05 21:27:20 +08:00
|
|
|
with open(path,"rb") as f:
|
|
|
|
dic = pickle.load(f)
|
2021-05-31 08:48:03 +08:00
|
|
|
dic[word] += [datetime.now().strftime('%Y%m%d%H%M')]
|
2023-06-05 21:27:20 +08:00
|
|
|
with open(path,"wb") as fp:
|
|
|
|
pickle.dump(dic,fp)
|
2021-05-31 08:48:03 +08:00
|
|
|
|
|
|
|
def familiar(path,word):
|
|
|
|
f = open(path,"rb")
|
|
|
|
dic = pickle.load(f)
|
|
|
|
if len(dic[word])>1:
|
|
|
|
del dic[word][0]
|
|
|
|
else:
|
|
|
|
dic.pop(word)
|
2023-06-04 00:35:43 +08:00
|
|
|
with open(path,"wb") as f:
|
|
|
|
pickle.dump(dic,f)
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
lst1 = [('apple',2), ('banana',1)]
|
|
|
|
d = {}
|
|
|
|
lst2dict(lst1, d) # d will change
|
|
|
|
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
|
|
|
|
|
|
|
|
|
|
|
|
lst2 = [('banana',2), ('orange', 4)]
|
|
|
|
d = load_record('frequency.p')
|
|
|
|
lst1 = dict2lst(d)
|
|
|
|
d = merge_frequency(lst2, lst1)
|
|
|
|
print(d)
|