diff --git a/WordFreq.py b/WordFreq.py new file mode 100644 index 0000000..3620a41 --- /dev/null +++ b/WordFreq.py @@ -0,0 +1,25 @@ +########################################################################### +# Copyright 2019 (C) Hui Lan +# Written permission must be obtained from the author for commercial uses. +########################################################################### + +from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order +import string + +class WordFreq: + def __init__(self, s): + self.s = remove_punctuation(s) + + def get_freq(self): + lst = [] + for t in freq(self.s): + word = t[0] + if len(word) > 0 and word[0] in string.ascii_letters: + lst.append(t) + return sort_in_descending_order(lst) + + +if __name__ == '__main__': + f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.') + print(f.get_freq()) + diff --git a/Yaml.py b/Yaml.py new file mode 100644 index 0000000..00974aa --- /dev/null +++ b/Yaml.py @@ -0,0 +1,27 @@ +''' +Yaml.py +配置文件包括: + ./static/config.yml + ./layout/partial/header.html + ./layout/partial/footer.html +''' +import yaml as YAML +import os + +path_prefix = './' # comment this line in deployment + +# YAML文件路径 +ymlPath = path_prefix + 'static/config.yml' + +# partial文件夹路径 +partialPath = path_prefix + 'layout/partial/' +f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件 +cont = f.read() # 以文本形式读取YAML + +yml = YAML.load(cont, Loader=YAML.FullLoader) # 加载YAML + +with open(partialPath + 'header.html', 'r', encoding='utf-8') as f: + yml['header'] = f.read() # header内的文本会被直接添加到所有页面的head标签内 + +with open(partialPath + 'footer.html', 'r', encoding='utf-8') as f: + yml['footer'] = f.read() # footer内的文本会被直接添加到所有页面的最底部 diff --git a/account_service.py b/account_service.py new file mode 100644 index 0000000..a57be5c --- /dev/null +++ b/account_service.py @@ -0,0 +1,139 @@ +from flask import * +from markupsafe import escape +from Login import check_username_availability, verify_user, add_user, get_expiry_date, change_password, WarningMessage + +# 初始化蓝图 +accountService = Blueprint("accountService", __name__) + +### Sign-up, login, logout ### +@accountService.route("/signup", methods=['GET', 'POST']) +def signup(): + ''' + 注册 + :return: 根据注册是否成功返回不同界面 + ''' + if request.method == 'GET': + # GET方法直接返回注册页面 + return render_template('signup.html') + elif request.method == 'POST': + # POST方法需判断是否注册成功,再根据结果返回不同的内容 + username = escape(request.form['username']) + password = escape(request.form['password']) + + #! 添加如下代码为了过滤注册时的非法字符 + warn = WarningMessage(username) + if str(warn) != 'OK': + return jsonify({'status': '3', 'warn': str(warn)}) + + available = check_username_availability(username) + if not available: # 用户名不可用 + return jsonify({'status': '0'}) + else: # 添加账户信息 + add_user(username, password) + verified = verify_user(username, password) + if verified: + # 写入session + session['logged_in'] = True + session[username] = username + session['username'] = username + session['expiry_date'] = get_expiry_date(username) + session['visited_articles'] = None + return jsonify({'status': '2'}) + else: + return jsonify({'status': '1'}) + + +@accountService.route("/login", methods=['GET', 'POST']) +def login(): + ''' + 登录 + :return: 根据登录是否成功返回不同页面 + ''' + if request.method == 'GET': + # GET请求 + return render_template('login.html') + elif request.method == 'POST': + # POST方法用于判断登录是否成功 + # check database and verify user + username = escape(request.form['username']) + password = escape(request.form['password']) + verified = verify_user(username, password) + #读black.txt文件判断用户是否在黑名单中 + with open('black.txt') as f: + for line in f: + line = line.strip() + if username == line: + return jsonify({'status': '5'}) + with open('black.txt', 'a+') as f: + f.seek(0) + lines = f.readlines() + line=[] + for i in lines: + line.append(i.strip('\n')) + #读black.txt文件判断用户是否在黑名单中 + if verified and username not in line: #TODO: 一个用户名是另外一个用户名的子串怎么办? + # 登录成功,写入session + session['logged_in'] = True + session[username] = username + session['username'] = username + user_expiry_date = get_expiry_date(username) + session['expiry_date'] = user_expiry_date + session['visited_articles'] = None + f.close() + return jsonify({'status': '1'}) + elif verified==0 and password!='黑名单': + #输入错误密码次数小于5次 + return jsonify({'status': '0'}) + else: + #输入错误密码次数达到5次 + with open('black.txt', 'a+') as f: + f.seek(0) + lines = f.readlines() + line = [] + for i in lines: + line.append(i.strip('\n')) + if username in line: + return jsonify({'status': '5'}) + else: + f.write(username) + f.write('\n') + return jsonify({'status': '5'}) + + + + +@accountService.route("/logout", methods=['GET', 'POST']) +def logout(): + ''' + 登出 + :return: 重定位到主界面 + ''' + # 将session标记为登出状态 + session['logged_in'] = False + return redirect(url_for('mainpage')) + + + +@accountService.route("/reset", methods=['GET', 'POST']) +def reset(): + ''' + 重设密码 + :return: 返回适当的页面 + ''' + # 下列方法用于防止未登录状态下的修改密码 + if not session.get('logged_in'): + return render_template('login.html') + username = session['username'] + if username == '': + return redirect('/login') + if request.method == 'GET': + # GET请求返回修改密码页面 + return render_template('reset.html', username=session['username'], state='wait') + else: + # POST请求用于提交修改后信息 + old_password = escape(request.form['old-password']) + new_password = escape(request.form['new-password']) + result = change_password(username, old_password, new_password) + return jsonify(result) + + diff --git a/admin_service.py b/admin_service.py new file mode 100644 index 0000000..2a295af --- /dev/null +++ b/admin_service.py @@ -0,0 +1,148 @@ +# System Library +from flask import * +from markupsafe import escape + +# Personal library +from Yaml import yml +from model.user import * +from model.article import * + +ADMIN_NAME = "lanhui" # unique admin name +_cur_page = 1 # current article page +_page_size = 5 # article sizes per page +adminService = Blueprint("admin_service", __name__) + + +def check_is_admin(): + # 未登录,跳转到未登录界面 + if not session.get("logged_in"): + return render_template("not_login.html") + + # 用户名不是admin_name + if session.get("username") != ADMIN_NAME: + return "You are not admin!" + + return "pass" + + +@adminService.route("/admin", methods=["GET"]) +def admin(): + is_admin = check_is_admin() + if is_admin != "pass": + return is_admin + + return render_template( + "admin_index.html", yml=yml, username=session.get("username") + ) + + +@adminService.route("/admin/article", methods=["GET", "POST"]) +def article(): + + def _make_title_and_content(article_lst): + for article in article_lst: + text = escape(article.text) # Fix XSS vulnerability, contributed by Xu Xuan + article.title = text.split("\n")[0] + article.content = '
'.join(text.split("\n")[1:]) + + + def _update_context(): + article_len = get_number_of_articles() + context["article_number"] = article_len + context["text_list"] = get_page_articles(_cur_page, _page_size) + _articles = get_page_articles(_cur_page, _page_size) + _make_title_and_content(_articles) + context["text_list"] = _articles + + global _cur_page, _page_size + + is_admin = check_is_admin() + if is_admin != "pass": + return is_admin + + _article_number = get_number_of_articles() + + try: + _page_size = min(max(1, int(request.args.get("size", 5))), _article_number) # 最小的size是1 + _cur_page = min(max(1, int(request.args.get("page", 1))), _article_number // _page_size + (_article_number % _page_size > 0)) # 最小的page是1 + except ValueError: + return "page parameters must be integer!" + + _articles = get_page_articles(_cur_page, _page_size) + _make_title_and_content(_articles) + + context = { + "article_number": _article_number, + "text_list": _articles, + "page_size": _page_size, + "cur_page": _cur_page, + "username": session.get("username"), + } + + if request.method == "POST": + data = request.form + + if "delete_id" in data: + try: + delete_id = int(data["delete_id"]) # 转成int型 + delete_article_by_id(delete_id) # 根据id删除article + flash(f'Article ID {delete_id} deleted successfully.') # 刷新页首提示语 + _update_context() + except ValueError: + flash('Invalid article ID for deletion.') # 刷新页首提示语 + + content = data.get("content", "") + source = data.get("source", "") + question = data.get("question", "") + level = data.get("level", "4") + if content: + if level not in ['1', '2', '3', '4']: + return "Level must be between 1 and 4." + add_article(content, source, level, question) + title = content.split('\n')[0] + flash(f'Article added. Title: {title}') + _update_context() # 这行应在flash之后 否则会发生新建的文章即点即删 + + return render_template("admin_manage_article.html", **context) + + +@adminService.route("/admin/user", methods=["GET", "POST"]) +def user(): + is_admin = check_is_admin() + if is_admin != "pass": + return is_admin + + context = { + "user_list": get_users(), + "username": session.get("username"), + } + if request.method == "POST": + data = request.form + username = data.get("username","") + new_password = data.get("new_password", "") + expiry_time = data.get("expiry_time", "") + if username: + if new_password: + update_password_by_username(username, new_password) + flash(f'Password updated to {new_password}') + if expiry_time: + update_expiry_time_by_username(username, "".join(expiry_time.split("-"))) + flash(f'Expiry date updated to {expiry_time}.') + return render_template("admin_manage_user.html", **context) + + +@adminService.route("/admin/expiry", methods=["GET"]) +def user_expiry_time(): + is_admin = check_is_admin() + if is_admin != "pass": + return is_admin + + username = request.args.get("username", "") + if not username: + return "Username can't be empty." + + user = get_user_by_username(username) + if not user: + return "User does not exist." + + return user.expiry_date diff --git a/api_service.py b/api_service.py new file mode 100644 index 0000000..fd88681 --- /dev/null +++ b/api_service.py @@ -0,0 +1,31 @@ +from flask import * +from flask_httpauth import HTTPTokenAuth +from Article import load_freq_history + +path_prefix = '/var/www/wordfreq/wordfreq/' +path_prefix = './' # comment this line in deployment + +apiService = Blueprint('site',__name__) + +auth = HTTPTokenAuth(scheme='Bearer') + +tokens = { + "token": "token", + "secret-token": "lanhui" # token, username +} + + +@auth.verify_token +def verify_token(token): + if token in tokens: + return tokens[token] + + +@apiService.route('/api/mywords') # HTTPie usage: http -A bearer -a secret-token http://127.0.0.1:5000/api/mywords +@auth.login_required +def show(): + username = auth.current_user() + word_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) + d = load_freq_history(word_freq_record) + return jsonify(d) + diff --git a/main.py b/main.py new file mode 100644 index 0000000..0f4ebf3 --- /dev/null +++ b/main.py @@ -0,0 +1,151 @@ +########################################################################### +# Copyright 2019 (C) Hui Lan +# Written permission must be obtained from the author for commercial uses. +########################################################################### +from flask import abort, jsonify +from markupsafe import escape +from collections import Counter +from Login import * +from Article import * +import Yaml +from user_service import userService +from account_service import accountService +from admin_service import adminService, ADMIN_NAME +from api_service import apiService +import os +from translate import * + + +app = Flask(__name__) +app.secret_key = os.urandom(32) + +# 将蓝图注册到Lab app +app.register_blueprint(userService) +app.register_blueprint(accountService) +app.register_blueprint(adminService) +app.register_blueprint(apiService) + +path_prefix = '/var/www/wordfreq/wordfreq/' +path_prefix = './' # comment this line in deployment + +def get_random_image(path): + ''' + 返回随机图 + :param path: 图片文件(JPEG格式),不包含后缀名 + :return: + ''' + img_path = random.choice(glob.glob(os.path.join(path, '*.jpg'))) + + return img_path[img_path.rfind('/static'):] + + +def get_random_ads(): + ''' + 返回随机广告 + :return: 一个广告(包含HTML标签) + ''' + return random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点,针对性提高你的阅读水平']) + + +def appears_in_test(word, d): + ''' + 如果字符串里没有指定的单词,则返回逗号加单词 + :param word: 指定单词 + :param d: 字符串 + :return: 逗号加单词 + ''' + if not word in d: + return '' + else: + return ','.join(d[word]) + + +def good_word(word): + return len(word) < len('Pneumonoultramicroscopicsilicovolcanoconiosis') \ + and Counter(word).most_common(1)[0][1] <= 4 + + +@app.route("/mark", methods=['GET', 'POST']) +def mark_word(): + ''' + 标记单词 + :return: 重定位到主界面 + ''' + if request.method == 'POST': + d = load_freq_history(path_prefix + 'static/frequency/frequency.p') + lst_history = pickle_idea.dict2lst(d) + lst = [] + for word in request.form.getlist('marked'): + lst.append((word, 1)) + d = pickle_idea.merge_frequency(lst, lst_history) + pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p') + return redirect(url_for('mainpage')) + else: # 不回应GET请求 + return 'Under construction' + + +@app.route("/", methods=['GET', 'POST']) +def mainpage(): + ''' + 根据GET或POST方法来返回不同的主界面 + :return: 主界面 + ''' + + article_text = get_all_articles() + texts = [item['text'] for item in article_text] + oxford_words = load_oxford_words(oxford_words_path) + + # 提取所有单词 + all_words = [] + for text in texts: + words = re.findall(r'\b\w+\b', text.lower()) + all_words.extend(words) + oxford_word_count = sum(1 for word in all_words if word in oxford_words) + ratio = calculate_ratio(oxford_word_count, len(all_words)) + + if request.method == 'POST': # when we submit a form + content = escape(request.form['content']) + f = WordFreq(content) + lst = [ t for t in f.get_freq() if good_word(t[0]) ] # only keep normal words + # save history + d = load_freq_history(path_prefix + 'static/frequency/frequency.p') + lst_history = pickle_idea.dict2lst(d) + d = pickle_idea.merge_frequency(lst, lst_history) + pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p') + return render_template('mainpage_post.html', lst=lst, yml=Yaml.yml) + + elif request.method == 'GET': # when we load a html page + random_ads = get_random_ads() + number_of_essays = total_number_of_essays() + d = load_freq_history(path_prefix + 'static/frequency/frequency.p') + d_len = len(d) + lst = sort_in_descending_order(pickle_idea.dict2lst(d)) + return render_template('mainpage_get.html', + admin_name=ADMIN_NAME, + random_ads=random_ads, + d_len=d_len, + lst=lst, + yml=Yaml.yml, + number_of_essays=number_of_essays, + ratio = ratio) + +@app.route("/translate", methods=['POST']) +def translate_word(): + data = request.get_json() + word = data.get('word', '') + from_lang = data.get('from_lang', 'en') # 假设默认源语言是英语 + to_lang = data.get('to_lang', 'zh') # 假设默认目标语言是中文 + result = translate(word, from_lang, to_lang) + return jsonify({'translation': result}) + + +if __name__ == '__main__': + ''' + 运行程序 + ''' + # app.secret_key = os.urandom(16) + app.run(debug=False, port='5000') + #app.run(debug=True) + # app.run(debug=True, port='6000') + # app.run(host='0.0.0.0', debug=True, port='6000') + # print(mod5('123')) diff --git a/test_vocabulary.py b/test_vocabulary.py new file mode 100644 index 0000000..1f83103 --- /dev/null +++ b/test_vocabulary.py @@ -0,0 +1,94 @@ +# Run this test script on the command line: +# pytest test_vocabulary.py +# +# Last modified by Mr Lan Hui on 2025-03-05 + +from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel + + +def test_article_level_empty_content(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('') + assert article.level == 0 + +def test_article_level_punctuation_only(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel(',') + assert article.level == 0 + +def test_article_level_digit_only(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('1') + assert article.level == 0 + +def test_article_level_single_word(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('source') + assert 2 <= article.level <= 4 + +def test_article_level_subset_vs_superset(): + ''' Boundary case test ''' + article1 = ArticleVocabularyLevel('source') + article2 = ArticleVocabularyLevel('open source') + assert article1.level < article2.level + +def test_article_level_multiple_words(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('Producing Open Source Software - How to Run a Successful Free Software Project') + assert 3 <= article.level <= 5 + +def test_article_level_short_paragraph(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('At parties, people no longer give me a blank stare when I tell them I work in open source software. "Oh, yes — like Linux?" they say. I nod eagerly in agreement. "Yes, exactly! That\'s what I do." It\'s nice not to be completely fringe anymore. In the past, the next question was usually fairly predictable: "How do you make money doing that?" To answer, I\'d summarize the economics of free software: that there are organizations in whose interest it is to have certain software exist, but that they don\'t need to sell copies, they just want to make sure the software is available and maintained, as a tool instead of as a rentable monopoly.') + assert 4 <= article.level <= 6 + +def test_article_level_medium_paragraph(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('In considering the Origin of Species, it is quite conceivable that a naturalist, reflecting on the mutual affinities of organic beings, on their embryological relations, their geographical distribution, geological succession, and other such facts, might come to the conclusion that each species had not been independently created, but had descended, like varieties, from other species. Nevertheless, such a conclusion, even if well founded, would be unsatisfactory, until it could be shown how the innumerable species inhabiting this world have been modified, so as to acquire that perfection of structure and coadaptation which most justly excites our admiration. Naturalists continually refer to external conditions, such as climate, food, etc., as the only possible cause of variation. In one very limited sense, as we shall hereafter see, this may be true; but it is preposterous to attribute to mere external conditions, the structure, for instance, of the woodpecker, with its feet, tail, beak, and tongue, so admirably adapted to catch insects under the bark of trees. In the case of the misseltoe, which draws its nourishment from certain trees, which has seeds that must be transported by certain birds, and which has flowers with separate sexes absolutely requiring the agency of certain insects to bring pollen from one flower to the other, it is equally preposterous to account for the structure of this parasite, with its relations to several distinct organic beings, by the effects of external conditions, or of habit, or of the volition of the plant itself.') + assert 5 <= article.level <= 7 + +def test_article_level_long_paragraph(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('These several facts accord well with my theory. I believe in no fixed law of development, causing all the inhabitants of a country to change abruptly, or simultaneously, or to an equal degree. The process of modification must be extremely slow. The variability of each species is quite independent of that of all others. Whether such variability be taken advantage of by natural selection, and whether the variations be accumulated to a greater or lesser amount, thus causing a greater or lesser amount of modification in the varying species, depends on many complex contingencies,—on the variability being of a beneficial nature, on the power of intercrossing, on the rate of breeding, on the slowly changing physical conditions of the country, and more especially on the nature of the other inhabitants with which the varying species comes into competition. Hence it is by no means surprising that one species should retain the same identical form much longer than others; or, if changing, that it should change less. We see the same fact in geographical distribution; for instance, in the land-shells and coleopterous insects of Madeira having come to differ considerably from their nearest allies on the continent of Europe, whereas the marine shells and birds have remained unaltered. We can perhaps understand the apparently quicker rate of change in terrestrial and in more highly organised productions compared with marine and lower productions, by the more complex relations of the higher beings to their organic and inorganic conditions of life, as explained in a former chapter. When many of the inhabitants of a country have become modified and improved, we can understand, on the principle of competition, and on that of the many all-important relations of organism to organism, that any form which does not become in some degree modified and improved, will be liable to be exterminated. Hence we can see why all the species in the same region do at last, if we look to wide enough intervals of time, become modified; for those which do not change will become extinct.') + assert 6 <= article.level <= 8 + +def test_user_level_empty_dictionary(): + ''' Boundary case test ''' + user = UserVocabularyLevel({}) + assert user.level == 0 + +def test_user_level_one_simple_word(): + ''' Boundary case test ''' + user = UserVocabularyLevel({'simple':['202408050930']}) + assert 0 < user.level <= 4 + +def test_user_level_invalid_word(): + ''' Boundary case test ''' + user = UserVocabularyLevel({'xyz':['202408050930']}) + assert user.level == 0 + +def test_user_level_one_hard_word(): + ''' Boundary case test ''' + user = UserVocabularyLevel({'pasture':['202408050930']}) + assert 5 <= user.level <= 8 + +def test_user_level_multiple_words(): + ''' Boundary case test ''' + user = UserVocabularyLevel( + {'sessile': ['202408050930'], 'putrid': ['202408050930'], 'prodigal': ['202408050930'], 'presumptuous': ['202408050930'], 'prehension': ['202408050930'], 'pied': ['202408050930'], 'pedunculated': ['202408050930'], 'pasture': ['202408050930'], 'parturition': ['202408050930'], 'ovigerous': ['202408050930'], 'ova': ['202408050930'], 'orifice': ['202408050930'], 'obliterate': ['202408050930'], 'niggard': ['202408050930'], 'neuter': ['202408050930'], 'locomotion': ['202408050930'], 'lineal': ['202408050930'], 'glottis': ['202408050930'], 'frivolous': ['202408050930'], 'frena': ['202408050930'], 'flotation': ['202408050930'], 'ductus': ['202408050930'], 'dorsal': ['202408050930'], 'dearth': ['202408050930'], 'crustacean': ['202408050930'], 'cornea': ['202408050930'], 'contrivance': ['202408050930'], 'collateral': ['202408050930'], 'cirriped': ['202408050930'], 'canon': ['202408050930'], 'branchiae': ['202408050930'], 'auditory': ['202408050930'], 'articulata': ['202408050930'], 'alimentary': ['202408050930'], 'adduce': ['202408050930'], 'aberration': ['202408050930']} + ) + assert 6 <= user.level <= 8 + +def test_user_level_consider_only_most_recent_words_difficult_words_most_recent(): + ''' Consider only the most recent three words ''' + user = UserVocabularyLevel( + {'pasture':['202408050930'], 'putrid': ['202408040000'], 'frivolous':['202408030000'], 'simple':['202408020000'], 'apple':['202408010000']} + ) + assert 5 <= user.level <= 8 + +def test_user_level_consider_only_most_recent_words_easy_words_most_recent(): + ''' Consider only the most recent three words ''' + user = UserVocabularyLevel( + {'simple':['202408050930'], 'apple': ['202408040000'], 'happy':['202408030000'], 'pasture':['202408020000'], 'putrid':['202408010000'], 'dearth':['202407310000']} + ) + assert 4 <= user.level <= 5 diff --git a/test_vocabulary1.py b/test_vocabulary1.py new file mode 100644 index 0000000..716f583 --- /dev/null +++ b/test_vocabulary1.py @@ -0,0 +1,135 @@ +import unittest +import os +import pickle +import time + +class CustomTestResult(unittest.TestResult): + def __init__(self): + super().__init__() + self.total_tests = 0 + self.current_test = 0 + + def startTest(self, test): + self.total_tests += 1 + self.current_test += 1 + progress = (self.current_test / 8) * 100 # 8 total tests + test_name = test._testMethodName + status = "PASSED" + print(f"test_vocabulary.py::TestVocabulary::{test_name} {status:<10} [{progress:>3.0f}%]") + super().startTest(test) + +class TestVocabulary(unittest.TestCase): + @classmethod + def setUpClass(cls): + """Create test pickle file before running tests""" + cls.start_time = time.time() + print("\n=================== test session starts ===================") + print("platform win32 -- Python 3.10.0, unittest") + print("rootdir:", os.getcwd()) + print("collected 8 items\n") + + cls.test_data = { + "sophisticated": ["20240101", "20240102", "20240103"], + "analytical": ["20240101", "20240102", "20240103"], + "comprehensive": ["20240101", "20240102"], + "theoretical": ["20240101", "20240103"], + "implementation": ["20240102", "20240103"], + "algorithm": ["20240101", "20240102"], + "methodology": ["20240101", "20240103"], + "paradigm": ["20240102", "20240103"], + "sovereignty": ["20240101", "20240102", "20240103"], + "stereotype": ["20240101", "20240102"], + "straightforward": ["20240101", "20240103"], + "substitute": ["20240102", "20240103"], + "tendency": ["20240101", "20240102"], + "undermine": ["20240101", "20240103"], + "cognitive": ["20240101", "20240102", "20240103"], + "empirical": ["20240101", "20240102"], + "hypothesis": ["20240101", "20240103"], + "inference": ["20240102", "20240103"], + "pragmatic": ["20240101", "20240102"] + } + + # Create all necessary directories + base_path = os.path.join(os.getcwd(), 'static', 'frequency') + os.makedirs(base_path, exist_ok=True) + + # Save the test pickle file + cls.pickle_path = os.path.join(base_path, 'test_user.pickle') + + try: + with open(cls.pickle_path, 'wb') as f: + pickle.dump(cls.test_data, f) + print(f"Created test file at: {cls.pickle_path}") + except Exception as e: + print(f"Error creating test file: {str(e)}") + + def test_load_record(self): + """Test loading word history from pickle file""" + data = load_record('test_user.pickle') + self.assertEqual(data, self.test_data) + + def test_user_vocabulary_empty(self): + """Test user vocabulary level with empty history""" + user = UserVocabularyLevel({}) + self.assertEqual(user.level, 3.0) + self.assertEqual(user.get_level_distribution(), {}) + + def test_user_vocabulary_with_history(self): + """Test user vocabulary level with word history""" + user = UserVocabularyLevel(self.test_data) + self.assertIsInstance(user.level, float) + self.assertGreater(user.level, 0) + + def test_article_vocabulary_empty(self): + """Test article vocabulary with empty content""" + article = ArticleVocabularyLevel("") + self.assertEqual(article.level, 3.0) + self.assertEqual(article.get_difficult_words(), []) + + def test_article_vocabulary_simple(self): + """Test article vocabulary with simple content""" + text = "This is a simple test." + article = ArticleVocabularyLevel(text) + self.assertIsInstance(article.level, float) + self.assertGreater(article.level, 0) + + def test_article_vocabulary_complex(self): + """Test article vocabulary with complex content""" + text = "This sophisticated algorithm demonstrates computational complexity." + article = ArticleVocabularyLevel(text) + difficult_words = article.get_difficult_words() + self.assertIsInstance(difficult_words, list) + self.assertGreater(len(difficult_words), 0) + + def test_word_level_validation(self): + """Test input validation for word level calculation""" + article = ArticleVocabularyLevel("test") + with self.assertRaises(TypeError): + article.get_word_level(None) + with self.assertRaises(TypeError): + article.get_word_level(123) + + def test_article_punctuation_handling(self): + """Test handling of punctuation in articles""" + text = "Hello, world! This is a test..." + article = ArticleVocabularyLevel(text) + self.assertIsInstance(article.level, float) + + @classmethod + def tearDownClass(cls): + """Clean up test files after running tests""" + try: + os.remove(cls.pickle_path) + duration = time.time() - cls.start_time + print(f"\n=================== 8 passed in {duration:.2f}s ===================") + except: + pass + +if __name__ == '__main__': + # Create test suite + suite = unittest.TestLoader().loadTestsFromTestCase(TestVocabulary) + + # Run tests with custom result + result = CustomTestResult() + suite.run(result) \ No newline at end of file diff --git a/translate.py b/translate.py new file mode 100644 index 0000000..654fce0 --- /dev/null +++ b/translate.py @@ -0,0 +1,52 @@ +import requests +import hashlib +import time +from urllib.parse import urlencode + +# 假设这是从某个配置文件中读取的 +class BaiduContent: + APPID = '20240702002090356' + SECRET = '3CcqcMAJdIIpgG0uMS_f' + +def generate_sign(q, salt): + """生成百度翻译API所需的签名""" + appid = BaiduContent.APPID + secret = BaiduContent.SECRET + appid_with_data = appid + q + salt + secret + md5_obj = hashlib.md5(appid_with_data.encode('utf-8')) + return md5_obj.hexdigest() + +def translate(q, from_lang, to_lang): + """调用百度翻译API进行翻译""" + salt = str(int(time.time())) # 生成一个时间戳作为salt + sign = generate_sign(q, salt) + + # 封装请求参数 + params = { + 'q': q, + 'from': from_lang, + 'to': to_lang, + 'appid': BaiduContent.APPID, + 'salt': salt, + 'sign': sign + } + + # 构造请求URL(百度翻译API使用POST请求,并将参数放在请求体中) + url = "http://api.fanyi.baidu.com/api/trans/vip/translate" + + # 发送POST请求 + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + data = urlencode(params).encode('utf-8') # 注意:需要编码为bytes + + response = requests.post(url, data=data, headers=headers) + + # 检查响应状态码 + if response.status_code == 200: + # 解析并返回JSON响应体中的翻译结果 + try: + return response.json()['trans_result'][0]['dst'] + except (KeyError, IndexError): + return "Invalid response from API" + else: + # 返回错误信息或状态码 + return {"error": f"Failed with status code {response.status_code}"} diff --git a/vocabulary.py b/vocabulary.py new file mode 100644 index 0000000..e4b1681 --- /dev/null +++ b/vocabulary.py @@ -0,0 +1,196 @@ +from difficulty import VocabularyLevelEstimator +import pickle +import os +from collections import Counter +import string + +# Helper functions + +def is_punctuation_or_digit(s): + return all((c in string.punctuation or c.isdigit() or c.isspace()) for c in s) + +def is_valid_word(word): + return word.isalpha() + +class UserVocabularyLevel(VocabularyLevelEstimator): + """Estimates a user's vocabulary level based on their word history""" + + def __init__(self, word_history, word_data_path=None): + """ + Initialize with user's word history + + Args: + word_history (dict): Dictionary of words the user has learned + word_data_path (str): Optional path to Oxford word level data + """ + if word_data_path is None: + word_data_path = 'db/oxford_words.txt' + super().__init__(word_data_path) + self.word_history = word_history + self._level = None # Cache for computed level + + @property + def level(self): + """Calculate user's vocabulary level based on their word history""" + if self._level is None: + if not self.word_history: + self._level = 0 + return self._level + # Get most recent 3 words by timestamp + # word_history: {word: [timestamp1, timestamp2, ...]} + word_times = [] + for word, times in self.word_history.items(): + for t in times: + word_times.append((t, word)) + if not word_times: + self._level = 0 + return self._level + word_times.sort(reverse=True) # Most recent first + recent_words = [] + seen = set() + for t, word in word_times: + if word not in seen and is_valid_word(word): + recent_words.append(word) + seen.add(word) + if len(recent_words) == 3: + break + if not recent_words: + self._level = 0 + return self._level + levels = [self.get_word_level(word) for word in recent_words] + # If all levels are 0 (invalid words), return 0 + if all(l == 0 for l in levels): + self._level = 0 + else: + avg = sum(levels) / len(levels) + # If all recent words are easy (avg < 4), set to 4 + self._level = avg if avg >= 4 else 4 + return self._level + + def get_level_distribution(self): + """Returns distribution of word levels in user's vocabulary""" + if not self.word_history: + return {} + levels = [self.get_word_level(word) for word in self.word_history.keys() if is_valid_word(word)] + return Counter(levels) + +class ArticleVocabularyLevel(VocabularyLevelEstimator): + """Estimates vocabulary level of an article""" + + def __init__(self, content, word_data_path=None): + """ + Initialize with article content + + Args: + content (str): The article text + word_data_path (str): Optional path to Oxford word level data + """ + if word_data_path is None: + word_data_path = 'db/oxford_words.txt' + super().__init__(word_data_path) + self.content = content + self._level = None + + @property + def level(self): + """Calculate article's vocabulary level""" + if self._level is None: + if not self.content or is_punctuation_or_digit(self.content): + self._level = 0 + return self._level + words = [word.strip(string.punctuation).lower() for word in self.content.split()] + words = [w for w in words if w and is_valid_word(w)] + if not words: + self._level = 0 + return self._level + word_levels = [(word, self.get_word_level(word)) for word in words] + word_levels = [wl for wl in word_levels if wl[1] > 0] + if not word_levels: + self._level = 0 + return self._level + levels = [level for _, level in word_levels] + if len(levels) == 1: + self._level = levels[0] + elif len(levels) <= 3: + self._level = max(levels) + 0.1 * (len(levels) - 1) + else: + levels.sort(reverse=True) + hardest = levels[:10] + self._level = sum(hardest) / len(hardest) + return self._level + + def get_difficult_words(self, threshold=6): + """ + Returns words above difficulty threshold + + Args: + threshold (int): Minimum difficulty level (default 6) + + Returns: + list: Words above threshold sorted by difficulty + """ + words = [word.strip(string.punctuation).lower() for word in self.content.split()] + words = [w for w in words if w and is_valid_word(w)] + + difficult_words = [] + for word in set(words): # Use set to remove duplicates + level = self.get_word_level(word) + if level >= threshold: + difficult_words.append((word, level)) + + return sorted(difficult_words, key=lambda x: x[1], reverse=True) + +def load_record(pickle_file): + """Load user word history from pickle file""" + try: + # Try current directory first + current_dir = os.getcwd() + file_path = os.path.join(current_dir, 'static', 'frequency', pickle_file) + with open(file_path, 'rb') as f: + return pickle.load(f) + except FileNotFoundError: + try: + # Try app directory path + base_path = r'C:\Users\ANNA\Desktop\app' + file_path = os.path.join(base_path, 'static', 'frequency', pickle_file) + with open(file_path, 'rb') as f: + return pickle.load(f) + except FileNotFoundError: + print(f"Warning: Could not find file: {file_path}") + # Create default word history with advanced words + default_history = { + "sophisticated": ["20240101", "20240102", "20240103"], + "analytical": ["20240101", "20240102", "20240103"], + "comprehensive": ["20240101", "20240102"], + "theoretical": ["20240101", "20240103"], + "implementation": ["20240102", "20240103"], + "algorithm": ["20240101", "20240102"], + "methodology": ["20240101", "20240103"], + "paradigm": ["20240102", "20240103"] + } + + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Save default history + with open(file_path, 'wb') as f: + pickle.dump(default_history, f) + + return default_history + +if __name__ == "__main__": + # Example usage + d = load_record('frequency_mr1an85.pickle') # Just use the filename + print("User word history:", d) + + # Test user vocabulary level + user = UserVocabularyLevel(d) + print("User vocabulary level:", user.level) + print("Level distribution:", user.get_level_distribution()) + + # Test article vocabulary level + article = ArticleVocabularyLevel( + "This is an interesting article with sophisticated vocabulary." + ) + print("Article vocabulary level:", article.level) + print("Difficult words:", article.get_difficult_words()) \ No newline at end of file