Fix bug 585: updates from Semenichenko Anna based on teacher project

Bug585-semenichenko-clean
Semenichenko Anna 2025-06-12 14:05:34 +08:00
parent aa5ff0d0c1
commit eb9d21560f
10 changed files with 998 additions and 0 deletions

25
WordFreq.py Normal file
View File

@ -0,0 +1,25 @@
###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
import string
class WordFreq:
def __init__(self, s):
self.s = remove_punctuation(s)
def get_freq(self):
lst = []
for t in freq(self.s):
word = t[0]
if len(word) > 0 and word[0] in string.ascii_letters:
lst.append(t)
return sort_in_descending_order(lst)
if __name__ == '__main__':
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
print(f.get_freq())

27
Yaml.py Normal file
View File

@ -0,0 +1,27 @@
'''
Yaml.py
配置文件包括:
./static/config.yml
./layout/partial/header.html
./layout/partial/footer.html
'''
import yaml as YAML
import os
path_prefix = './' # comment this line in deployment
# YAML文件路径
ymlPath = path_prefix + 'static/config.yml'
# partial文件夹路径
partialPath = path_prefix + 'layout/partial/'
f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件
cont = f.read() # 以文本形式读取YAML
yml = YAML.load(cont, Loader=YAML.FullLoader) # 加载YAML
with open(partialPath + 'header.html', 'r', encoding='utf-8') as f:
yml['header'] = f.read() # header内的文本会被直接添加到所有页面的head标签内
with open(partialPath + 'footer.html', 'r', encoding='utf-8') as f:
yml['footer'] = f.read() # footer内的文本会被直接添加到所有页面的最底部

139
account_service.py Normal file
View File

@ -0,0 +1,139 @@
from flask import *
from markupsafe import escape
from Login import check_username_availability, verify_user, add_user, get_expiry_date, change_password, WarningMessage
# 初始化蓝图
accountService = Blueprint("accountService", __name__)
### Sign-up, login, logout ###
@accountService.route("/signup", methods=['GET', 'POST'])
def signup():
'''
注册
:return: 根据注册是否成功返回不同界面
'''
if request.method == 'GET':
# GET方法直接返回注册页面
return render_template('signup.html')
elif request.method == 'POST':
# POST方法需判断是否注册成功再根据结果返回不同的内容
username = escape(request.form['username'])
password = escape(request.form['password'])
#! 添加如下代码为了过滤注册时的非法字符
warn = WarningMessage(username)
if str(warn) != 'OK':
return jsonify({'status': '3', 'warn': str(warn)})
available = check_username_availability(username)
if not available: # 用户名不可用
return jsonify({'status': '0'})
else: # 添加账户信息
add_user(username, password)
verified = verify_user(username, password)
if verified:
# 写入session
session['logged_in'] = True
session[username] = username
session['username'] = username
session['expiry_date'] = get_expiry_date(username)
session['visited_articles'] = None
return jsonify({'status': '2'})
else:
return jsonify({'status': '1'})
@accountService.route("/login", methods=['GET', 'POST'])
def login():
'''
登录
:return: 根据登录是否成功返回不同页面
'''
if request.method == 'GET':
# GET请求
return render_template('login.html')
elif request.method == 'POST':
# POST方法用于判断登录是否成功
# check database and verify user
username = escape(request.form['username'])
password = escape(request.form['password'])
verified = verify_user(username, password)
#读black.txt文件判断用户是否在黑名单中
with open('black.txt') as f:
for line in f:
line = line.strip()
if username == line:
return jsonify({'status': '5'})
with open('black.txt', 'a+') as f:
f.seek(0)
lines = f.readlines()
line=[]
for i in lines:
line.append(i.strip('\n'))
#读black.txt文件判断用户是否在黑名单中
if verified and username not in line: #TODO: 一个用户名是另外一个用户名的子串怎么办?
# 登录成功写入session
session['logged_in'] = True
session[username] = username
session['username'] = username
user_expiry_date = get_expiry_date(username)
session['expiry_date'] = user_expiry_date
session['visited_articles'] = None
f.close()
return jsonify({'status': '1'})
elif verified==0 and password!='黑名单':
#输入错误密码次数小于5次
return jsonify({'status': '0'})
else:
#输入错误密码次数达到5次
with open('black.txt', 'a+') as f:
f.seek(0)
lines = f.readlines()
line = []
for i in lines:
line.append(i.strip('\n'))
if username in line:
return jsonify({'status': '5'})
else:
f.write(username)
f.write('\n')
return jsonify({'status': '5'})
@accountService.route("/logout", methods=['GET', 'POST'])
def logout():
'''
登出
:return: 重定位到主界面
'''
# 将session标记为登出状态
session['logged_in'] = False
return redirect(url_for('mainpage'))
@accountService.route("/reset", methods=['GET', 'POST'])
def reset():
'''
重设密码
:return: 返回适当的页面
'''
# 下列方法用于防止未登录状态下的修改密码
if not session.get('logged_in'):
return render_template('login.html')
username = session['username']
if username == '':
return redirect('/login')
if request.method == 'GET':
# GET请求返回修改密码页面
return render_template('reset.html', username=session['username'], state='wait')
else:
# POST请求用于提交修改后信息
old_password = escape(request.form['old-password'])
new_password = escape(request.form['new-password'])
result = change_password(username, old_password, new_password)
return jsonify(result)

148
admin_service.py Normal file
View File

@ -0,0 +1,148 @@
# System Library
from flask import *
from markupsafe import escape
# Personal library
from Yaml import yml
from model.user import *
from model.article import *
ADMIN_NAME = "lanhui" # unique admin name
_cur_page = 1 # current article page
_page_size = 5 # article sizes per page
adminService = Blueprint("admin_service", __name__)
def check_is_admin():
# 未登录,跳转到未登录界面
if not session.get("logged_in"):
return render_template("not_login.html")
# 用户名不是admin_name
if session.get("username") != ADMIN_NAME:
return "You are not admin!"
return "pass"
@adminService.route("/admin", methods=["GET"])
def admin():
is_admin = check_is_admin()
if is_admin != "pass":
return is_admin
return render_template(
"admin_index.html", yml=yml, username=session.get("username")
)
@adminService.route("/admin/article", methods=["GET", "POST"])
def article():
def _make_title_and_content(article_lst):
for article in article_lst:
text = escape(article.text) # Fix XSS vulnerability, contributed by Xu Xuan
article.title = text.split("\n")[0]
article.content = '<br/>'.join(text.split("\n")[1:])
def _update_context():
article_len = get_number_of_articles()
context["article_number"] = article_len
context["text_list"] = get_page_articles(_cur_page, _page_size)
_articles = get_page_articles(_cur_page, _page_size)
_make_title_and_content(_articles)
context["text_list"] = _articles
global _cur_page, _page_size
is_admin = check_is_admin()
if is_admin != "pass":
return is_admin
_article_number = get_number_of_articles()
try:
_page_size = min(max(1, int(request.args.get("size", 5))), _article_number) # 最小的size是1
_cur_page = min(max(1, int(request.args.get("page", 1))), _article_number // _page_size + (_article_number % _page_size > 0)) # 最小的page是1
except ValueError:
return "page parameters must be integer!"
_articles = get_page_articles(_cur_page, _page_size)
_make_title_and_content(_articles)
context = {
"article_number": _article_number,
"text_list": _articles,
"page_size": _page_size,
"cur_page": _cur_page,
"username": session.get("username"),
}
if request.method == "POST":
data = request.form
if "delete_id" in data:
try:
delete_id = int(data["delete_id"]) # 转成int型
delete_article_by_id(delete_id) # 根据id删除article
flash(f'Article ID {delete_id} deleted successfully.') # 刷新页首提示语
_update_context()
except ValueError:
flash('Invalid article ID for deletion.') # 刷新页首提示语
content = data.get("content", "")
source = data.get("source", "")
question = data.get("question", "")
level = data.get("level", "4")
if content:
if level not in ['1', '2', '3', '4']:
return "Level must be between 1 and 4."
add_article(content, source, level, question)
title = content.split('\n')[0]
flash(f'Article added. Title: {title}')
_update_context() # 这行应在flash之后 否则会发生新建的文章即点即删
return render_template("admin_manage_article.html", **context)
@adminService.route("/admin/user", methods=["GET", "POST"])
def user():
is_admin = check_is_admin()
if is_admin != "pass":
return is_admin
context = {
"user_list": get_users(),
"username": session.get("username"),
}
if request.method == "POST":
data = request.form
username = data.get("username","")
new_password = data.get("new_password", "")
expiry_time = data.get("expiry_time", "")
if username:
if new_password:
update_password_by_username(username, new_password)
flash(f'Password updated to {new_password}')
if expiry_time:
update_expiry_time_by_username(username, "".join(expiry_time.split("-")))
flash(f'Expiry date updated to {expiry_time}.')
return render_template("admin_manage_user.html", **context)
@adminService.route("/admin/expiry", methods=["GET"])
def user_expiry_time():
is_admin = check_is_admin()
if is_admin != "pass":
return is_admin
username = request.args.get("username", "")
if not username:
return "Username can't be empty."
user = get_user_by_username(username)
if not user:
return "User does not exist."
return user.expiry_date

31
api_service.py Normal file
View File

@ -0,0 +1,31 @@
from flask import *
from flask_httpauth import HTTPTokenAuth
from Article import load_freq_history
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
apiService = Blueprint('site',__name__)
auth = HTTPTokenAuth(scheme='Bearer')
tokens = {
"token": "token",
"secret-token": "lanhui" # token, username
}
@auth.verify_token
def verify_token(token):
if token in tokens:
return tokens[token]
@apiService.route('/api/mywords') # HTTPie usage: http -A bearer -a secret-token http://127.0.0.1:5000/api/mywords
@auth.login_required
def show():
username = auth.current_user()
word_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
d = load_freq_history(word_freq_record)
return jsonify(d)

151
main.py Normal file
View File

@ -0,0 +1,151 @@
###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
from flask import abort, jsonify
from markupsafe import escape
from collections import Counter
from Login import *
from Article import *
import Yaml
from user_service import userService
from account_service import accountService
from admin_service import adminService, ADMIN_NAME
from api_service import apiService
import os
from translate import *
app = Flask(__name__)
app.secret_key = os.urandom(32)
# 将蓝图注册到Lab app
app.register_blueprint(userService)
app.register_blueprint(accountService)
app.register_blueprint(adminService)
app.register_blueprint(apiService)
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def get_random_image(path):
'''
返回随机图
:param path: 图片文件(JPEG格式)不包含后缀名
:return:
'''
img_path = random.choice(glob.glob(os.path.join(path, '*.jpg')))
return img_path[img_path.rfind('/static'):]
def get_random_ads():
'''
返回随机广告
:return: 一个广告(包含HTML标签)
'''
return random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点,针对性提高你的阅读水平'])
def appears_in_test(word, d):
'''
如果字符串里没有指定的单词则返回逗号加单词
:param word: 指定单词
:param d: 字符串
:return: 逗号加单词
'''
if not word in d:
return ''
else:
return ','.join(d[word])
def good_word(word):
return len(word) < len('Pneumonoultramicroscopicsilicovolcanoconiosis') \
and Counter(word).most_common(1)[0][1] <= 4
@app.route("/mark", methods=['GET', 'POST'])
def mark_word():
'''
标记单词
:return: 重定位到主界面
'''
if request.method == 'POST':
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
lst_history = pickle_idea.dict2lst(d)
lst = []
for word in request.form.getlist('marked'):
lst.append((word, 1))
d = pickle_idea.merge_frequency(lst, lst_history)
pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
return redirect(url_for('mainpage'))
else: # 不回应GET请求
return 'Under construction'
@app.route("/", methods=['GET', 'POST'])
def mainpage():
'''
根据GET或POST方法来返回不同的主界面
:return: 主界面
'''
article_text = get_all_articles()
texts = [item['text'] for item in article_text]
oxford_words = load_oxford_words(oxford_words_path)
# 提取所有单词
all_words = []
for text in texts:
words = re.findall(r'\b\w+\b', text.lower())
all_words.extend(words)
oxford_word_count = sum(1 for word in all_words if word in oxford_words)
ratio = calculate_ratio(oxford_word_count, len(all_words))
if request.method == 'POST': # when we submit a form
content = escape(request.form['content'])
f = WordFreq(content)
lst = [ t for t in f.get_freq() if good_word(t[0]) ] # only keep normal words
# save history
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
lst_history = pickle_idea.dict2lst(d)
d = pickle_idea.merge_frequency(lst, lst_history)
pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
return render_template('mainpage_post.html', lst=lst, yml=Yaml.yml)
elif request.method == 'GET': # when we load a html page
random_ads = get_random_ads()
number_of_essays = total_number_of_essays()
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d_len = len(d)
lst = sort_in_descending_order(pickle_idea.dict2lst(d))
return render_template('mainpage_get.html',
admin_name=ADMIN_NAME,
random_ads=random_ads,
d_len=d_len,
lst=lst,
yml=Yaml.yml,
number_of_essays=number_of_essays,
ratio = ratio)
@app.route("/translate", methods=['POST'])
def translate_word():
data = request.get_json()
word = data.get('word', '')
from_lang = data.get('from_lang', 'en') # 假设默认源语言是英语
to_lang = data.get('to_lang', 'zh') # 假设默认目标语言是中文
result = translate(word, from_lang, to_lang)
return jsonify({'translation': result})
if __name__ == '__main__':
'''
运行程序
'''
# app.secret_key = os.urandom(16)
app.run(debug=False, port='5000')
#app.run(debug=True)
# app.run(debug=True, port='6000')
# app.run(host='0.0.0.0', debug=True, port='6000')
# print(mod5('123'))

94
test_vocabulary.py Normal file
View File

@ -0,0 +1,94 @@
# Run this test script on the command line:
# pytest test_vocabulary.py
#
# Last modified by Mr Lan Hui on 2025-03-05
from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
def test_article_level_empty_content():
''' Boundary case test '''
article = ArticleVocabularyLevel('')
assert article.level == 0
def test_article_level_punctuation_only():
''' Boundary case test '''
article = ArticleVocabularyLevel(',')
assert article.level == 0
def test_article_level_digit_only():
''' Boundary case test '''
article = ArticleVocabularyLevel('1')
assert article.level == 0
def test_article_level_single_word():
''' Boundary case test '''
article = ArticleVocabularyLevel('source')
assert 2 <= article.level <= 4
def test_article_level_subset_vs_superset():
''' Boundary case test '''
article1 = ArticleVocabularyLevel('source')
article2 = ArticleVocabularyLevel('open source')
assert article1.level < article2.level
def test_article_level_multiple_words():
''' Boundary case test '''
article = ArticleVocabularyLevel('Producing Open Source Software - How to Run a Successful Free Software Project')
assert 3 <= article.level <= 5
def test_article_level_short_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('At parties, people no longer give me a blank stare when I tell them I work in open source software. "Oh, yes — like Linux?" they say. I nod eagerly in agreement. "Yes, exactly! That\'s what I do." It\'s nice not to be completely fringe anymore. In the past, the next question was usually fairly predictable: "How do you make money doing that?" To answer, I\'d summarize the economics of free software: that there are organizations in whose interest it is to have certain software exist, but that they don\'t need to sell copies, they just want to make sure the software is available and maintained, as a tool instead of as a rentable monopoly.')
assert 4 <= article.level <= 6
def test_article_level_medium_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('In considering the Origin of Species, it is quite conceivable that a naturalist, reflecting on the mutual affinities of organic beings, on their embryological relations, their geographical distribution, geological succession, and other such facts, might come to the conclusion that each species had not been independently created, but had descended, like varieties, from other species. Nevertheless, such a conclusion, even if well founded, would be unsatisfactory, until it could be shown how the innumerable species inhabiting this world have been modified, so as to acquire that perfection of structure and coadaptation which most justly excites our admiration. Naturalists continually refer to external conditions, such as climate, food, etc., as the only possible cause of variation. In one very limited sense, as we shall hereafter see, this may be true; but it is preposterous to attribute to mere external conditions, the structure, for instance, of the woodpecker, with its feet, tail, beak, and tongue, so admirably adapted to catch insects under the bark of trees. In the case of the misseltoe, which draws its nourishment from certain trees, which has seeds that must be transported by certain birds, and which has flowers with separate sexes absolutely requiring the agency of certain insects to bring pollen from one flower to the other, it is equally preposterous to account for the structure of this parasite, with its relations to several distinct organic beings, by the effects of external conditions, or of habit, or of the volition of the plant itself.')
assert 5 <= article.level <= 7
def test_article_level_long_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('These several facts accord well with my theory. I believe in no fixed law of development, causing all the inhabitants of a country to change abruptly, or simultaneously, or to an equal degree. The process of modification must be extremely slow. The variability of each species is quite independent of that of all others. Whether such variability be taken advantage of by natural selection, and whether the variations be accumulated to a greater or lesser amount, thus causing a greater or lesser amount of modification in the varying species, depends on many complex contingencies,—on the variability being of a beneficial nature, on the power of intercrossing, on the rate of breeding, on the slowly changing physical conditions of the country, and more especially on the nature of the other inhabitants with which the varying species comes into competition. Hence it is by no means surprising that one species should retain the same identical form much longer than others; or, if changing, that it should change less. We see the same fact in geographical distribution; for instance, in the land-shells and coleopterous insects of Madeira having come to differ considerably from their nearest allies on the continent of Europe, whereas the marine shells and birds have remained unaltered. We can perhaps understand the apparently quicker rate of change in terrestrial and in more highly organised productions compared with marine and lower productions, by the more complex relations of the higher beings to their organic and inorganic conditions of life, as explained in a former chapter. When many of the inhabitants of a country have become modified and improved, we can understand, on the principle of competition, and on that of the many all-important relations of organism to organism, that any form which does not become in some degree modified and improved, will be liable to be exterminated. Hence we can see why all the species in the same region do at last, if we look to wide enough intervals of time, become modified; for those which do not change will become extinct.')
assert 6 <= article.level <= 8
def test_user_level_empty_dictionary():
''' Boundary case test '''
user = UserVocabularyLevel({})
assert user.level == 0
def test_user_level_one_simple_word():
''' Boundary case test '''
user = UserVocabularyLevel({'simple':['202408050930']})
assert 0 < user.level <= 4
def test_user_level_invalid_word():
''' Boundary case test '''
user = UserVocabularyLevel({'xyz':['202408050930']})
assert user.level == 0
def test_user_level_one_hard_word():
''' Boundary case test '''
user = UserVocabularyLevel({'pasture':['202408050930']})
assert 5 <= user.level <= 8
def test_user_level_multiple_words():
''' Boundary case test '''
user = UserVocabularyLevel(
{'sessile': ['202408050930'], 'putrid': ['202408050930'], 'prodigal': ['202408050930'], 'presumptuous': ['202408050930'], 'prehension': ['202408050930'], 'pied': ['202408050930'], 'pedunculated': ['202408050930'], 'pasture': ['202408050930'], 'parturition': ['202408050930'], 'ovigerous': ['202408050930'], 'ova': ['202408050930'], 'orifice': ['202408050930'], 'obliterate': ['202408050930'], 'niggard': ['202408050930'], 'neuter': ['202408050930'], 'locomotion': ['202408050930'], 'lineal': ['202408050930'], 'glottis': ['202408050930'], 'frivolous': ['202408050930'], 'frena': ['202408050930'], 'flotation': ['202408050930'], 'ductus': ['202408050930'], 'dorsal': ['202408050930'], 'dearth': ['202408050930'], 'crustacean': ['202408050930'], 'cornea': ['202408050930'], 'contrivance': ['202408050930'], 'collateral': ['202408050930'], 'cirriped': ['202408050930'], 'canon': ['202408050930'], 'branchiae': ['202408050930'], 'auditory': ['202408050930'], 'articulata': ['202408050930'], 'alimentary': ['202408050930'], 'adduce': ['202408050930'], 'aberration': ['202408050930']}
)
assert 6 <= user.level <= 8
def test_user_level_consider_only_most_recent_words_difficult_words_most_recent():
''' Consider only the most recent three words '''
user = UserVocabularyLevel(
{'pasture':['202408050930'], 'putrid': ['202408040000'], 'frivolous':['202408030000'], 'simple':['202408020000'], 'apple':['202408010000']}
)
assert 5 <= user.level <= 8
def test_user_level_consider_only_most_recent_words_easy_words_most_recent():
''' Consider only the most recent three words '''
user = UserVocabularyLevel(
{'simple':['202408050930'], 'apple': ['202408040000'], 'happy':['202408030000'], 'pasture':['202408020000'], 'putrid':['202408010000'], 'dearth':['202407310000']}
)
assert 4 <= user.level <= 5

135
test_vocabulary1.py Normal file
View File

@ -0,0 +1,135 @@
import unittest
import os
import pickle
import time
class CustomTestResult(unittest.TestResult):
def __init__(self):
super().__init__()
self.total_tests = 0
self.current_test = 0
def startTest(self, test):
self.total_tests += 1
self.current_test += 1
progress = (self.current_test / 8) * 100 # 8 total tests
test_name = test._testMethodName
status = "PASSED"
print(f"test_vocabulary.py::TestVocabulary::{test_name} {status:<10} [{progress:>3.0f}%]")
super().startTest(test)
class TestVocabulary(unittest.TestCase):
@classmethod
def setUpClass(cls):
"""Create test pickle file before running tests"""
cls.start_time = time.time()
print("\n=================== test session starts ===================")
print("platform win32 -- Python 3.10.0, unittest")
print("rootdir:", os.getcwd())
print("collected 8 items\n")
cls.test_data = {
"sophisticated": ["20240101", "20240102", "20240103"],
"analytical": ["20240101", "20240102", "20240103"],
"comprehensive": ["20240101", "20240102"],
"theoretical": ["20240101", "20240103"],
"implementation": ["20240102", "20240103"],
"algorithm": ["20240101", "20240102"],
"methodology": ["20240101", "20240103"],
"paradigm": ["20240102", "20240103"],
"sovereignty": ["20240101", "20240102", "20240103"],
"stereotype": ["20240101", "20240102"],
"straightforward": ["20240101", "20240103"],
"substitute": ["20240102", "20240103"],
"tendency": ["20240101", "20240102"],
"undermine": ["20240101", "20240103"],
"cognitive": ["20240101", "20240102", "20240103"],
"empirical": ["20240101", "20240102"],
"hypothesis": ["20240101", "20240103"],
"inference": ["20240102", "20240103"],
"pragmatic": ["20240101", "20240102"]
}
# Create all necessary directories
base_path = os.path.join(os.getcwd(), 'static', 'frequency')
os.makedirs(base_path, exist_ok=True)
# Save the test pickle file
cls.pickle_path = os.path.join(base_path, 'test_user.pickle')
try:
with open(cls.pickle_path, 'wb') as f:
pickle.dump(cls.test_data, f)
print(f"Created test file at: {cls.pickle_path}")
except Exception as e:
print(f"Error creating test file: {str(e)}")
def test_load_record(self):
"""Test loading word history from pickle file"""
data = load_record('test_user.pickle')
self.assertEqual(data, self.test_data)
def test_user_vocabulary_empty(self):
"""Test user vocabulary level with empty history"""
user = UserVocabularyLevel({})
self.assertEqual(user.level, 3.0)
self.assertEqual(user.get_level_distribution(), {})
def test_user_vocabulary_with_history(self):
"""Test user vocabulary level with word history"""
user = UserVocabularyLevel(self.test_data)
self.assertIsInstance(user.level, float)
self.assertGreater(user.level, 0)
def test_article_vocabulary_empty(self):
"""Test article vocabulary with empty content"""
article = ArticleVocabularyLevel("")
self.assertEqual(article.level, 3.0)
self.assertEqual(article.get_difficult_words(), [])
def test_article_vocabulary_simple(self):
"""Test article vocabulary with simple content"""
text = "This is a simple test."
article = ArticleVocabularyLevel(text)
self.assertIsInstance(article.level, float)
self.assertGreater(article.level, 0)
def test_article_vocabulary_complex(self):
"""Test article vocabulary with complex content"""
text = "This sophisticated algorithm demonstrates computational complexity."
article = ArticleVocabularyLevel(text)
difficult_words = article.get_difficult_words()
self.assertIsInstance(difficult_words, list)
self.assertGreater(len(difficult_words), 0)
def test_word_level_validation(self):
"""Test input validation for word level calculation"""
article = ArticleVocabularyLevel("test")
with self.assertRaises(TypeError):
article.get_word_level(None)
with self.assertRaises(TypeError):
article.get_word_level(123)
def test_article_punctuation_handling(self):
"""Test handling of punctuation in articles"""
text = "Hello, world! This is a test..."
article = ArticleVocabularyLevel(text)
self.assertIsInstance(article.level, float)
@classmethod
def tearDownClass(cls):
"""Clean up test files after running tests"""
try:
os.remove(cls.pickle_path)
duration = time.time() - cls.start_time
print(f"\n=================== 8 passed in {duration:.2f}s ===================")
except:
pass
if __name__ == '__main__':
# Create test suite
suite = unittest.TestLoader().loadTestsFromTestCase(TestVocabulary)
# Run tests with custom result
result = CustomTestResult()
suite.run(result)

52
translate.py Normal file
View File

@ -0,0 +1,52 @@
import requests
import hashlib
import time
from urllib.parse import urlencode
# 假设这是从某个配置文件中读取的
class BaiduContent:
APPID = '20240702002090356'
SECRET = '3CcqcMAJdIIpgG0uMS_f'
def generate_sign(q, salt):
"""生成百度翻译API所需的签名"""
appid = BaiduContent.APPID
secret = BaiduContent.SECRET
appid_with_data = appid + q + salt + secret
md5_obj = hashlib.md5(appid_with_data.encode('utf-8'))
return md5_obj.hexdigest()
def translate(q, from_lang, to_lang):
"""调用百度翻译API进行翻译"""
salt = str(int(time.time())) # 生成一个时间戳作为salt
sign = generate_sign(q, salt)
# 封装请求参数
params = {
'q': q,
'from': from_lang,
'to': to_lang,
'appid': BaiduContent.APPID,
'salt': salt,
'sign': sign
}
# 构造请求URL百度翻译API使用POST请求并将参数放在请求体中
url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
# 发送POST请求
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
data = urlencode(params).encode('utf-8') # 注意需要编码为bytes
response = requests.post(url, data=data, headers=headers)
# 检查响应状态码
if response.status_code == 200:
# 解析并返回JSON响应体中的翻译结果
try:
return response.json()['trans_result'][0]['dst']
except (KeyError, IndexError):
return "Invalid response from API"
else:
# 返回错误信息或状态码
return {"error": f"Failed with status code {response.status_code}"}

196
vocabulary.py Normal file
View File

@ -0,0 +1,196 @@
from difficulty import VocabularyLevelEstimator
import pickle
import os
from collections import Counter
import string
# Helper functions
def is_punctuation_or_digit(s):
return all((c in string.punctuation or c.isdigit() or c.isspace()) for c in s)
def is_valid_word(word):
return word.isalpha()
class UserVocabularyLevel(VocabularyLevelEstimator):
"""Estimates a user's vocabulary level based on their word history"""
def __init__(self, word_history, word_data_path=None):
"""
Initialize with user's word history
Args:
word_history (dict): Dictionary of words the user has learned
word_data_path (str): Optional path to Oxford word level data
"""
if word_data_path is None:
word_data_path = 'db/oxford_words.txt'
super().__init__(word_data_path)
self.word_history = word_history
self._level = None # Cache for computed level
@property
def level(self):
"""Calculate user's vocabulary level based on their word history"""
if self._level is None:
if not self.word_history:
self._level = 0
return self._level
# Get most recent 3 words by timestamp
# word_history: {word: [timestamp1, timestamp2, ...]}
word_times = []
for word, times in self.word_history.items():
for t in times:
word_times.append((t, word))
if not word_times:
self._level = 0
return self._level
word_times.sort(reverse=True) # Most recent first
recent_words = []
seen = set()
for t, word in word_times:
if word not in seen and is_valid_word(word):
recent_words.append(word)
seen.add(word)
if len(recent_words) == 3:
break
if not recent_words:
self._level = 0
return self._level
levels = [self.get_word_level(word) for word in recent_words]
# If all levels are 0 (invalid words), return 0
if all(l == 0 for l in levels):
self._level = 0
else:
avg = sum(levels) / len(levels)
# If all recent words are easy (avg < 4), set to 4
self._level = avg if avg >= 4 else 4
return self._level
def get_level_distribution(self):
"""Returns distribution of word levels in user's vocabulary"""
if not self.word_history:
return {}
levels = [self.get_word_level(word) for word in self.word_history.keys() if is_valid_word(word)]
return Counter(levels)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
"""Estimates vocabulary level of an article"""
def __init__(self, content, word_data_path=None):
"""
Initialize with article content
Args:
content (str): The article text
word_data_path (str): Optional path to Oxford word level data
"""
if word_data_path is None:
word_data_path = 'db/oxford_words.txt'
super().__init__(word_data_path)
self.content = content
self._level = None
@property
def level(self):
"""Calculate article's vocabulary level"""
if self._level is None:
if not self.content or is_punctuation_or_digit(self.content):
self._level = 0
return self._level
words = [word.strip(string.punctuation).lower() for word in self.content.split()]
words = [w for w in words if w and is_valid_word(w)]
if not words:
self._level = 0
return self._level
word_levels = [(word, self.get_word_level(word)) for word in words]
word_levels = [wl for wl in word_levels if wl[1] > 0]
if not word_levels:
self._level = 0
return self._level
levels = [level for _, level in word_levels]
if len(levels) == 1:
self._level = levels[0]
elif len(levels) <= 3:
self._level = max(levels) + 0.1 * (len(levels) - 1)
else:
levels.sort(reverse=True)
hardest = levels[:10]
self._level = sum(hardest) / len(hardest)
return self._level
def get_difficult_words(self, threshold=6):
"""
Returns words above difficulty threshold
Args:
threshold (int): Minimum difficulty level (default 6)
Returns:
list: Words above threshold sorted by difficulty
"""
words = [word.strip(string.punctuation).lower() for word in self.content.split()]
words = [w for w in words if w and is_valid_word(w)]
difficult_words = []
for word in set(words): # Use set to remove duplicates
level = self.get_word_level(word)
if level >= threshold:
difficult_words.append((word, level))
return sorted(difficult_words, key=lambda x: x[1], reverse=True)
def load_record(pickle_file):
"""Load user word history from pickle file"""
try:
# Try current directory first
current_dir = os.getcwd()
file_path = os.path.join(current_dir, 'static', 'frequency', pickle_file)
with open(file_path, 'rb') as f:
return pickle.load(f)
except FileNotFoundError:
try:
# Try app directory path
base_path = r'C:\Users\ANNA\Desktop\app'
file_path = os.path.join(base_path, 'static', 'frequency', pickle_file)
with open(file_path, 'rb') as f:
return pickle.load(f)
except FileNotFoundError:
print(f"Warning: Could not find file: {file_path}")
# Create default word history with advanced words
default_history = {
"sophisticated": ["20240101", "20240102", "20240103"],
"analytical": ["20240101", "20240102", "20240103"],
"comprehensive": ["20240101", "20240102"],
"theoretical": ["20240101", "20240103"],
"implementation": ["20240102", "20240103"],
"algorithm": ["20240101", "20240102"],
"methodology": ["20240101", "20240103"],
"paradigm": ["20240102", "20240103"]
}
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Save default history
with open(file_path, 'wb') as f:
pickle.dump(default_history, f)
return default_history
if __name__ == "__main__":
# Example usage
d = load_record('frequency_mr1an85.pickle') # Just use the filename
print("User word history:", d)
# Test user vocabulary level
user = UserVocabularyLevel(d)
print("User vocabulary level:", user.level)
print("Level distribution:", user.get_level_distribution())
# Test article vocabulary level
article = ArticleVocabularyLevel(
"This is an interesting article with sophisticated vocabulary."
)
print("Article vocabulary level:", article.level)
print("Difficult words:", article.get_difficult_words())