########################################################################### # Copyright 2019 (C) Hui Lan # Written permission must be obtained from the author for commercial uses. ########################################################################### # Purpose: dictionary & pickle as a simple means of database. # Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency. # Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates. import pickle from datetime import datetime def lst2dict(lst, d): ''' Store the information in list lst to dictionary d. Now stores frequency count instead of dates list. ''' for x in lst: word = x[0] if isinstance(x[1], list): # if it's a list of dates count = len(x[1]) # convert to frequency else: count = x[1] # already a frequency if not word in d: d[word] = count else: d[word] += count def deleteRecord(path,word): with open(path, 'rb') as f: db = pickle.load(f) try: db.pop(word) except KeyError: print("sorry") with open(path, 'wb') as ff: pickle.dump(db, ff) def dict2lst(d): if len(d) > 0: keys = list(d.keys()) if isinstance(d[keys[0]], int): return list(d.items()) # return (word, frequency) pairs directly elif isinstance(d[keys[0]], list): return [(k, len(v)) for k, v in d.items()] # convert date lists to counts return [] def merge_frequency(lst1, lst2): d = {} lst2dict(lst1, d) lst2dict(lst2, d) return d def load_record(pickle_fname): f = open(pickle_fname, 'rb') d = pickle.load(f) f.close() return d def save_frequency_to_pickle(d, pickle_fname): f = open(pickle_fname, 'wb') d2 = {} for k in d: if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2: if isinstance(d[k], list): d2[k] = len(d[k]) # store frequency count instead of dates list else: d2[k] = d[k] pickle.dump(d2, f) f.close() exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w'] if __name__ == '__main__': # Test 1: Convert dates to frequencies lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])] d = {} lst2dict(lst1, d) print("Test 1 - Convert dates to frequencies:") print(d) # Should show: {'apple': 2, 'banana': 1} # Test 2: Save and load frequencies save_frequency_to_pickle(d, 'frequency.p') loaded_d = load_record('frequency.p') print("\nTest 2 - Load saved frequencies:") print(loaded_d) # Should match the previous output # Test 3: Merge frequencies lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])] lst1 = dict2lst(loaded_d) merged_d = merge_frequency(lst2, lst1) print("\nTest 3 - Merge frequencies:") print(merged_d) # Should show banana with increased frequency