0
0
Fork 0
EnglishPal/app/pickle_idea2.py

148 lines
4.9 KiB
Python
Raw Permalink Normal View History

2024-07-04 15:37:40 +08:00
###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
# Purpose: dictionary & pickle as a simple means of database.
# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.
import pickle
import requests
import hashlib
import random
from urllib.parse import urlencode
from datetime import datetime
class BaiduContent:
APPID = '20200314000398337' # 将'您的APPID'替换为实际的APPID
KEY = 'uZ6Sdwz_V1zu9q1peowk' # 将'您的密钥'替换为实际的API密钥
def generate_sign(appid, q, salt, key):
sign_str = appid + q + str(salt) + key
sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()
return sign
def is_valid_word(word):
url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
salt = random.randint(32768, 65536)
sign = generate_sign(BaiduContent.APPID, word, salt, BaiduContent.KEY)
params = {
'q': word,
'from': 'en',
'to': 'zh',
'appid': BaiduContent.APPID,
'salt': salt,
'sign': sign
}
headers = {'Content-Type': "application/x-www-form-urlencoded"}
data = urlencode(params).encode('utf-8')
try:
response = requests.post(url, data=data, headers=headers)
if response.status_code == 200:
data = response.json()
# print(data['trans_result'][0]['dst'])
# 检查是否含有翻译结果,并判断翻译后的文本是否与原文不同
if 'trans_result' in data and data['trans_result'][0]['dst'] != word:
return True
else:
return False
else:
return False
except requests.RequestException:
return False
def lst2dict(lst, d):
'''
Store the information in list lst to dictionary d.
Note: nothing is returned.
'''
for x in lst:
word = x[0]
dates = x[1]
if not word in d:
d[word] = dates
else:
d[word] += dates
def deleteRecord(path,word):
with open(path, 'rb') as f:
db = pickle.load(f)
try:
db.pop(word)
except KeyError:
print("sorry")
with open(path, 'wb') as ff:
pickle.dump(db, ff)
def dict2lst(d):
if len(d) > 0:
keys = list(d.keys())
if isinstance(d[keys[0]], int):
lst = []
for k in d:
lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
return lst
elif isinstance(d[keys[0]], list):
return list(d.items()) # a list of (key, value) pairs
return []
def merge_frequency(lst1, lst2):
d = {}
lst2dict(lst1, d)
lst2dict(lst2, d)
return d
def load_record(pickle_fname):
f = open(pickle_fname, 'rb')
d = pickle.load(f)
f.close()
return d
# exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
def save_frequency_to_pickle(d, pickle_fname):
with open(pickle_fname, 'wb') as f: # 使用 with 语句自动处理文件关闭
exclusion_lst = []
d2 = {}
illegal = False # 标记是否合法
added = False
for k in d:
if k not in exclusion_lst and not k.isnumeric():
if is_valid_word(k): # 只有当单词不合法时进行标记
d2[k] = list(sorted(d[k]))
added =True
else:
illegal = True # 标记至少处理了一个有效单词
if illegal:
if not added:
pickle.dump({}, f)
else:
pickle.dump(d2, f)
return 0 # 返回0表示成功处理存在非法单词
else:
pickle.dump(d2, f)
return 1 # 返回1表示成功处理并保存至少一个单词
if __name__ == '__main__':
lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])]
d = {}
lst2dict(lst1, d) # d will change
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
d = load_record('frequency.p')
lst1 = dict2lst(d)
d = merge_frequency(lst2, lst1)
print(d)