########################################################################### # Copyright 2019 (C) Hui Lan # Written permission must be obtained from the author for commercial uses. ########################################################################### # Purpose: dictionary & pickle as a simple means of database. # Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency. # Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates. import pickle import requests import hashlib import random from urllib.parse import urlencode from datetime import datetime class BaiduContent: APPID = '20200314000398337' # 将'您的APPID'替换为实际的APPID KEY = 'uZ6Sdwz_V1zu9q1peowk' # 将'您的密钥'替换为实际的API密钥 def generate_sign(appid, q, salt, key): sign_str = appid + q + str(salt) + key sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest() return sign def is_valid_word(word): url = "https://fanyi-api.baidu.com/api/trans/vip/translate" salt = random.randint(32768, 65536) sign = generate_sign(BaiduContent.APPID, word, salt, BaiduContent.KEY) params = { 'q': word, 'from': 'en', 'to': 'zh', 'appid': BaiduContent.APPID, 'salt': salt, 'sign': sign } headers = {'Content-Type': "application/x-www-form-urlencoded"} data = urlencode(params).encode('utf-8') try: response = requests.post(url, data=data, headers=headers) if response.status_code == 200: data = response.json() # print(data['trans_result'][0]['dst']) # 检查是否含有翻译结果,并判断翻译后的文本是否与原文不同 if 'trans_result' in data and data['trans_result'][0]['dst'] != word: return True else: return False else: return False except requests.RequestException: return False def lst2dict(lst, d): ''' Store the information in list lst to dictionary d. Note: nothing is returned. ''' for x in lst: word = x[0] dates = x[1] if not word in d: d[word] = dates else: d[word] += dates def deleteRecord(path,word): with open(path, 'rb') as f: db = pickle.load(f) try: db.pop(word) except KeyError: print("sorry") with open(path, 'wb') as ff: pickle.dump(db, ff) def dict2lst(d): if len(d) > 0: keys = list(d.keys()) if isinstance(d[keys[0]], int): lst = [] for k in d: lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')])) return lst elif isinstance(d[keys[0]], list): return list(d.items()) # a list of (key, value) pairs return [] def merge_frequency(lst1, lst2): d = {} lst2dict(lst1, d) lst2dict(lst2, d) return d def load_record(pickle_fname): f = open(pickle_fname, 'rb') d = pickle.load(f) f.close() return d # exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w'] def save_frequency_to_pickle(d, pickle_fname): with open(pickle_fname, 'wb') as f: # 使用 with 语句自动处理文件关闭 exclusion_lst = [] d2 = {} illegal = False # 标记是否合法 added = False for k in d: if k not in exclusion_lst and not k.isnumeric(): if is_valid_word(k): # 只有当单词不合法时进行标记 d2[k] = list(sorted(d[k])) added =True else: illegal = True # 标记至少处理了一个有效单词 if illegal: if not added: pickle.dump({}, f) else: pickle.dump(d2, f) return 0 # 返回0表示成功处理存在非法单词 else: pickle.dump(d2, f) return 1 # 返回1表示成功处理并保存至少一个单词 if __name__ == '__main__': lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])] d = {} lst2dict(lst1, d) # d will change save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])] d = load_record('frequency.p') lst1 = dict2lst(d) d = merge_frequency(lst2, lst1) print(d)