2021-04-06 16:22:03 +08:00
|
|
|
###########################################################################
|
|
|
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
|
|
|
# Written permission must be obtained from the author for commercial uses.
|
|
|
|
###########################################################################
|
|
|
|
|
|
|
|
|
|
|
|
# Purpose: dictionary & pickle as a simple means of database.
|
|
|
|
# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
|
|
|
|
# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.
|
|
|
|
|
|
|
|
import pickle
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
def lst2dict(lst, d):
|
|
|
|
'''
|
|
|
|
Store the information in list lst to dictionary d.
|
|
|
|
Note: nothing is returned.
|
|
|
|
|
|
|
|
'''
|
|
|
|
for x in lst:
|
|
|
|
word = x[0]
|
|
|
|
dates = x[1]
|
|
|
|
if not word in d:
|
|
|
|
d[word] = dates
|
|
|
|
else:
|
|
|
|
d[word] += dates
|
|
|
|
|
2021-05-30 21:30:18 +08:00
|
|
|
def deleteRecord(path,word):
|
|
|
|
with open(path, 'rb') as f:
|
|
|
|
db = pickle.load(f)
|
|
|
|
try:
|
|
|
|
db.pop(word)
|
|
|
|
except KeyError:
|
|
|
|
print("sorry")
|
|
|
|
with open(path, 'wb') as ff:
|
|
|
|
pickle.dump(db, ff)
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
def dict2lst(d):
|
|
|
|
if len(d) > 0:
|
|
|
|
keys = list(d.keys())
|
|
|
|
if isinstance(d[keys[0]], int):
|
|
|
|
lst = []
|
|
|
|
for k in d:
|
|
|
|
lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
|
|
|
|
return lst
|
|
|
|
elif isinstance(d[keys[0]], list):
|
|
|
|
return list(d.items()) # a list of (key, value) pairs
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
def merge_frequency(lst1, lst2):
|
|
|
|
d = {}
|
|
|
|
lst2dict(lst1, d)
|
|
|
|
lst2dict(lst2, d)
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def load_record(pickle_fname):
|
|
|
|
f = open(pickle_fname, 'rb')
|
|
|
|
d = pickle.load(f)
|
|
|
|
f.close()
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
def save_frequency_to_pickle(d, pickle_fname):
|
|
|
|
f = open(pickle_fname, 'wb')
|
|
|
|
d2 = {}
|
|
|
|
for k in d:
|
2024-06-28 20:47:21 +08:00
|
|
|
if not k.isnumeric() and not len(k) < 2:
|
2023-04-25 08:40:26 +08:00
|
|
|
d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k])))
|
2021-04-06 16:22:03 +08:00
|
|
|
pickle.dump(d2, f)
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])]
|
|
|
|
d = {}
|
|
|
|
lst2dict(lst1, d) # d will change
|
|
|
|
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
|
|
|
|
|
|
|
|
|
|
|
|
lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
|
|
|
|
d = load_record('frequency.p')
|
|
|
|
lst1 = dict2lst(d)
|
|
|
|
d = merge_frequency(lst2, lst1)
|
|
|
|
print(d)
|