EnglishPal/app/pickle_idea2.py

###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################


# Purpose: dictionary & pickle as a simple means of database.
# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.

import pickle
from datetime import datetime

def lst2dict(lst, d):
    '''
    Store the information in list lst to dictionary d.
    Now stores frequency count instead of dates list.
    '''
    for x in lst:
        word = x[0]
        if isinstance(x[1], list):  # if it's a list of dates
            count = len(x[1])       # convert to frequency
        else:
            count = x[1]            # already a frequency

        if not word in d:
            d[word] = count
        else:
            d[word] += count

def deleteRecord(path,word):
    with open(path, 'rb') as f:
        db = pickle.load(f)
    try:
        db.pop(word)
    except KeyError:
        print("sorry")
    with open(path, 'wb') as ff:
            pickle.dump(db, ff)

def dict2lst(d):
    if len(d) > 0:
        keys = list(d.keys())
        if isinstance(d[keys[0]], int):
            return list(d.items())  # return (word, frequency) pairs directly
        elif isinstance(d[keys[0]], list):
            return [(k, len(v)) for k, v in d.items()]  # convert date lists to counts

    return []

def merge_frequency(lst1, lst2):
    d = {}
    lst2dict(lst1, d)
    lst2dict(lst2, d)
    return d


def load_record(pickle_fname):
    f = open(pickle_fname, 'rb')
    d = pickle.load(f)
    f.close()
    return d


def save_frequency_to_pickle(d, pickle_fname):
    f = open(pickle_fname, 'wb')
    d2 = {}
    for k in d:
        if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2:
            if isinstance(d[k], list):
                d2[k] = len(d[k])  # store frequency count instead of dates list
            else:
                d2[k] = d[k]
    pickle.dump(d2, f)
    f.close()


exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']

if __name__ == '__main__':
    # Test 1: Convert dates to frequencies
    lst1 = [('apple',['201910251437', '201910251438']),  ('banana',['201910251439'])]
    d = {}
    lst2dict(lst1, d)
    print("Test 1 - Convert dates to frequencies:")
    print(d)  # Should show: {'apple': 2, 'banana': 1}

    # Test 2: Save and load frequencies
    save_frequency_to_pickle(d, 'frequency.p')
    loaded_d = load_record('frequency.p')
    print("\nTest 2 - Load saved frequencies:")
    print(loaded_d)  # Should match the previous output

    # Test 3: Merge frequencies
    lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
    lst1 = dict2lst(loaded_d)
    merged_d = merge_frequency(lst2, lst1)
    print("\nTest 3 - Merge frequencies:")
    print(merged_d)  # Should show banana with increased frequency