EnglishPal/app/pickle_idea2.py

###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################


# Purpose: dictionary & pickle as a simple means of database.
# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.

import pickle
import requests
import hashlib
import random
from urllib.parse import urlencode

from datetime import datetime

class BaiduContent:
    APPID = '20200314000398337'  # 将'您的APPID'替换为实际的APPID
    KEY = 'uZ6Sdwz_V1zu9q1peowk'     # 将'您的密钥'替换为实际的API密钥

def generate_sign(appid, q, salt, key):
    sign_str = appid + q + str(salt) + key
    sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()
    return sign
def is_valid_word(word):
    url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
    salt = random.randint(32768, 65536)
    sign = generate_sign(BaiduContent.APPID, word, salt, BaiduContent.KEY)

    params = {
        'q': word,
        'from': 'en',
        'to': 'zh',
        'appid': BaiduContent.APPID,
        'salt': salt,
        'sign': sign
    }

    headers = {'Content-Type': "application/x-www-form-urlencoded"}
    data = urlencode(params).encode('utf-8')

    try:
        response = requests.post(url, data=data, headers=headers)
        if response.status_code == 200:
            data = response.json()
            # print(data['trans_result'][0]['dst'])
            # 检查是否含有翻译结果，并判断翻译后的文本是否与原文不同
            if 'trans_result' in data and data['trans_result'][0]['dst'] != word:
                return True
            else:
                return False
        else:
            return False
    except requests.RequestException:
        return False

def lst2dict(lst, d):
    ''' 
    Store the information in list lst to dictionary d. 
    Note: nothing is returned.

    '''
    for x in lst:
        word = x[0]
        dates = x[1]
        if not word in d:
            d[word] = dates
        else:
            d[word] += dates

def deleteRecord(path,word):
    with open(path, 'rb') as f:
        db = pickle.load(f)
    try:
        db.pop(word)
    except KeyError:
        print("sorry")
    with open(path, 'wb') as ff:
            pickle.dump(db, ff)

def dict2lst(d):
    if len(d) > 0:
        keys = list(d.keys())
        if isinstance(d[keys[0]], int):
            lst = []
            for k in d:
                lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
            return lst
        elif isinstance(d[keys[0]], list):
            return list(d.items()) # a list of (key, value) pairs

    return []

def merge_frequency(lst1, lst2):
    d = {}
    lst2dict(lst1, d)
    lst2dict(lst2, d)
    return d


def load_record(pickle_fname):
    f = open(pickle_fname, 'rb')
    d = pickle.load(f)
    f.close()
    return d

    # exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
def save_frequency_to_pickle(d, pickle_fname):
    with open(pickle_fname, 'wb') as f:  # 使用 with 语句自动处理文件关闭
        exclusion_lst = []
        d2 = {}
        illegal = False  # 标记是否合法
        added = False
        for k in d:
            if k not in exclusion_lst and not k.isnumeric():
                if is_valid_word(k):  # 只有当单词不合法时进行标记
                    d2[k] = list(sorted(d[k]))
                    added =True
                else:
                    illegal = True  # 标记至少处理了一个有效单词

        if illegal:
            if not added:
                pickle.dump({}, f)
            else:
                pickle.dump(d2, f)
            return 0  # 返回0表示成功处理存在非法单词
        else:
            pickle.dump(d2, f)
            return 1  # 返回1表示成功处理并保存至少一个单词


if __name__ == '__main__':

    lst1 = [('apple',['201910251437', '201910251438']),  ('banana',['201910251439'])]
    d = {}
    lst2dict(lst1, d) # d will change
    save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database


    lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
    d = load_record('frequency.p')
    lst1 = dict2lst(d)
    d = merge_frequency(lst2, lst1)
    print(d)
Fix bug 563 2024-07-04 15:37:40 +08:00			`###########################################################################`
			`# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>`
			`# Written permission must be obtained from the author for commercial uses.`
			`###########################################################################`


			`# Purpose: dictionary & pickle as a simple means of database.`
			`# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.`
			`# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.`

			`import pickle`
			`import requests`
			`import hashlib`
			`import random`
			`from urllib.parse import urlencode`

			`from datetime import datetime`

			`class BaiduContent:`
			`APPID = '20200314000398337' # 将'您的APPID'替换为实际的APPID`
			`KEY = 'uZ6Sdwz_V1zu9q1peowk' # 将'您的密钥'替换为实际的API密钥`

			`def generate_sign(appid, q, salt, key):`
			`sign_str = appid + q + str(salt) + key`
			`sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()`
			`return sign`
			`def is_valid_word(word):`
			`url = "https://fanyi-api.baidu.com/api/trans/vip/translate"`
			`salt = random.randint(32768, 65536)`
			`sign = generate_sign(BaiduContent.APPID, word, salt, BaiduContent.KEY)`

			`params = {`
			`'q': word,`
			`'from': 'en',`
			`'to': 'zh',`
			`'appid': BaiduContent.APPID,`
			`'salt': salt,`
			`'sign': sign`
			`}`

			`headers = {'Content-Type': "application/x-www-form-urlencoded"}`
			`data = urlencode(params).encode('utf-8')`

			`try:`
			`response = requests.post(url, data=data, headers=headers)`
			`if response.status_code == 200:`
			`data = response.json()`
			`# print(data['trans_result'][0]['dst'])`
			`# 检查是否含有翻译结果，并判断翻译后的文本是否与原文不同`
			`if 'trans_result' in data and data['trans_result'][0]['dst'] != word:`
			`return True`
			`else:`
			`return False`
			`else:`
			`return False`
			`except requests.RequestException:`
			`return False`

			`def lst2dict(lst, d):`
			`'''`
			`Store the information in list lst to dictionary d.`
			`Note: nothing is returned.`

			`'''`
			`for x in lst:`
			`word = x[0]`
			`dates = x[1]`
			`if not word in d:`
			`d[word] = dates`
			`else:`
			`d[word] += dates`

			`def deleteRecord(path,word):`
			`with open(path, 'rb') as f:`
			`db = pickle.load(f)`
			`try:`
			`db.pop(word)`
			`except KeyError:`
			`print("sorry")`
			`with open(path, 'wb') as ff:`
			`pickle.dump(db, ff)`

			`def dict2lst(d):`
			`if len(d) > 0:`
			`keys = list(d.keys())`
			`if isinstance(d[keys[0]], int):`
			`lst = []`
			`for k in d:`
			`lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))`
			`return lst`
			`elif isinstance(d[keys[0]], list):`
			`return list(d.items()) # a list of (key, value) pairs`

			`return []`

			`def merge_frequency(lst1, lst2):`
			`d = {}`
			`lst2dict(lst1, d)`
			`lst2dict(lst2, d)`
			`return d`


			`def load_record(pickle_fname):`
			`f = open(pickle_fname, 'rb')`
			`d = pickle.load(f)`
			`f.close()`
			`return d`

			`# exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']`
			`def save_frequency_to_pickle(d, pickle_fname):`
			`with open(pickle_fname, 'wb') as f: # 使用 with 语句自动处理文件关闭`
			`exclusion_lst = []`
			`d2 = {}`
			`illegal = False # 标记是否合法`
			`added = False`
			`for k in d:`
			`if k not in exclusion_lst and not k.isnumeric():`
			`if is_valid_word(k): # 只有当单词不合法时进行标记`
			`d2[k] = list(sorted(d[k]))`
			`added =True`
			`else:`
			`illegal = True # 标记至少处理了一个有效单词`

			`if illegal:`
			`if not added:`
			`pickle.dump({}, f)`
			`else:`
			`pickle.dump(d2, f)`
			`return 0 # 返回0表示成功处理存在非法单词`
			`else:`
			`pickle.dump(d2, f)`
			`return 1 # 返回1表示成功处理并保存至少一个单词`


			`if __name__ == '__main__':`

			`lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])]`
			`d = {}`
			`lst2dict(lst1, d) # d will change`
			`save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database`


			`lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]`
			`d = load_record('frequency.p')`
			`lst1 = dict2lst(d)`
			`d = merge_frequency(lst2, lst1)`
			`print(d)`