17 changed files with 577 additions and 205 deletions
--- a/app/Yaml.py
+++ b/app/Yaml.py
@ -15,13 +15,13 @@ ymlPath = path_prefix + 'static/config.yml'

 # partial文件夹路径
 partialPath = path_prefix + 'layout/partial/'
-f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件
-cont = f.read()  # 以文本形式读取YAML
+f = open(ymlPath, 'r', encoding='utf-8')
+cont = f.read()

 yml = YAML.load(cont, Loader=YAML.FullLoader)  # 加载YAML

 with open(partialPath + 'header.html', 'r', encoding='utf-8') as f:
-    yml['header'] = f.read() # header内的文本会被直接添加到所有页面的head标签内
+    yml['header'] = f.read()  # header内的文本会被直接添加到所有页面的head标签内

 with open(partialPath + 'footer.html', 'r', encoding='utf-8') as f:
-    yml['footer'] = f.read() # footer内的文本会被直接添加到所有页面的最底部
+    yml['footer'] = f.read()  # footer内的文本会被直接添加到所有页面的最底部
--- a/app/difficulty.py
+++ b/app/difficulty.py
@ -1,3 +1,4 @@
+# difficulty.py
 ###########################################################################
 # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 # Written permission must be obtained from the author for commercial uses.
@ -5,12 +6,18 @@

 # Purpose: compute difficulty level of a English text

+# difficulty.py
+
 import pickle
 import math
+
+
+from vocabulary import Vocabulary
 from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
 import snowballstemmer


+
 def load_record(pickle_fname):
    f = open(pickle_fname, 'rb')
    d = pickle.load(f)
@ -51,26 +58,23 @@ def get_difficulty_level_for_user(d1, d2):
    d1 用户不会的词
    在d2的后面添加单词，没有新建一个新的字典
    """
-    # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed.
    if ENGLISH_WORD_DIFFICULTY_DICT == {}:
        d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
    else:
        d2 = ENGLISH_WORD_DIFFICULTY_DICT

-    stemmer = snowballstemmer.stemmer('english')
+    estimator = Vocabulary(d2)  # 传递 difficulty_dict 参数

    for k in d1:  # 用户的词
-        if k in d2:  # 如果用户的词以原型的形式存在于词库d2中
-            continue  # 无需评级，跳过
-        else:
-            stem = stemmer.stemWord(k)
-            if stem in d2:  # 如果用户的词的词根存在于词库d2的词根库中
-                d2[k] = d2[stem]  # 按照词根进行评级
-            else:
-                d2[k] = 3  # 如果k的词根都不在，那么就当认为是3级
+        if k not in d2:  # 如果用户的词不在词库d2中
+            difficulty = estimator.get_word_difficulty(k)  # 使用 VocabularyLevelEstimator 获取难度
+            d2[k] = difficulty
+
    return d2


+
+
 def revert_dict(d):
    '''
    In d, word is the key, and value is a list of dates.
@ -148,31 +152,12 @@ def user_difficulty_level(d_user, d, calc_func=0):



-def text_difficulty_level(s, d):
-    s = remove_punctuation(s)
-    L = freq(s)
-
-    lst = []  # a list of tuples, each tuple being (word, difficulty level)
-    stop_words = {'the':1, 'and':1, 'of':1, 'to':1, 'what':1, 'in':1, 'there':1, 'when':1, 'them':1, 'would':1, 'will':1, 'out':1, 'his':1, 'mr':1, 'that':1, 'up':1, 'more':1, 'your':1, 'it':1, 'now':1, 'very':1, 'then':1, 'could':1, 'he':1, 'any':1, 'some':1, 'with':1, 'into':1, 'you':1, 'our':1, 'man':1, 'other':1, 'time':1, 'was':1, 'than':1, 'know':1, 'about':1, 'only':1, 'like':1, 'how':1, 'see':1, 'is':1, 'before':1, 'such':1, 'little':1, 'two':1, 'its':1, 'as':1, 'these':1, 'may':1, 'much':1, 'down':1, 'for':1, 'well':1, 'should':1, 'those':1, 'after':1, 'same':1, 'must':1, 'say':1, 'first':1, 'again':1, 'us':1, 'great':1, 'where':1, 'being':1, 'come':1, 'over':1, 'good':1, 'himself':1, 'am':1, 'never':1, 'on':1, 'old':1, 'here':1, 'way':1, 'at':1, 'go':1, 'upon':1, 'have':1, 'had':1, 'without':1, 'my':1, 'day':1, 'be':1, 'but':1, 'though':1, 'from':1, 'not':1, 'too':1, 'another':1, 'this':1, 'even':1, 'still':1, 'her':1, 'yet':1, 'under':1, 'by':1, 'let':1, 'just':1, 'all':1, 'because':1, 'we':1, 'always':1, 'off':1, 'yes':1, 'so':1, 'while':1, 'why':1, 'which':1, 'me':1, 'are':1, 'or':1, 'no':1, 'if':1, 'an':1, 'also':1, 'thus':1, 'who':1, 'cannot':1, 'she':1, 'whether':1} # ignore these words while computing the artile's difficulty level
-    for x in L:
-        word = x[0]
-        if word not in stop_words and word in d:
-            lst.append((word, d[word]))
-
-    lst2 = sort_in_descending_order(lst)  # most difficult words on top
-    # print(lst2)
-    count = 0
-    geometric = 1
-    for t in lst2:
-        word = t[0]
-        hard = t[1]
-        geometric = geometric * (hard)
-        count += 1
-        if count >= 20:  # we look for n most difficult words
-            return geometric ** (1 / count)
-
-    return geometric ** (1 / max(count, 1))
-
+def text_difficulty_level(text, difficulty_dict):
+    """
+    计算文本的难度级别
+    """
+    estimator = Vocabulary(difficulty_dict)  # 传递 difficulty_dict 参数
+    return estimator.get_text_difficulty(text)  # 只传递 text 参数

 if __name__ == '__main__':
    d1 = load_record('frequency.p')
--- a/app/static/css/highlighted.css
+++ b/app/static/css/highlighted.css
@ -1,5 +1,5 @@
-
-.highlighted {
-    color: red;
-    font-weight: normal;
+
+.highlighted {
+    color: red;
+    font-weight: normal;
 }
--- a/app/static/js/password.js
+++ b/app/static/js/password.js
@ -11,7 +11,7 @@ function containsDigitsLettersSpecialCharacters(s) {
    resultL =  /[a-z]/i.test(s);

    // Special charater test
-    '+-*/,.:;/\[]<>$%&()!?^~'.split('').forEach((x) => {
+    '+-*/,.:;/[]<>$%&()!?^~@'.split('').forEach((x) => {
 	if (s.includes(x))
 	    resultS = 1;
    });
--- a/app/static/words
+++ b/app/static/words
@ -0,0 +1,13 @@
+
+_TEST_MOCK = {
+    'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2,
+    'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3,
+    'available': 4, 'organizations': 4,
+    'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable:': 6,
+    'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7,
+    'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained,': 5,
+    'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7,
+    'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7,
+    'xyz': 0, '': 0
+}
+
--- a/app/test/test_bug536_jiangwangzhe.py
+++ b/app/test/test_bug536_jiangwangzhe.py
@ -1,88 +1,89 @@
-from selenium.webdriver.common.alert import Alert
+import pytest
+from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+
+# 配置测试环境
+BASE_URL = "http://localhost:8000"  # 替换为您的应用URL


-# 对用户名不能为中文进行测试
-def test_register_username_with_chinese(driver, URL):
-    try:
-        driver.get(URL + "/signup")
-
-        # 等待用户名输入框出现
-        username_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'username'))
-        )
-        username_elem.send_keys("测试用户")  # 输入中文用户名
-
-        # 等待密码输入框出现
-        password_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'password'))
-        )
-        password_elem.send_keys("validPassword123")  # 输入有效密码
-
-        # 等待确认密码输入框出现
-        password2_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'password2'))
-        )
-        password2_elem.send_keys("validPassword123")  # 输入有效确认密码
-
-        # 等待注册按钮出现并点击
-        signup_button = WebDriverWait(driver, 10).until(
-            EC.element_to_be_clickable((By.XPATH, '//button[@onclick="signup()"]'))
-        )
-        signup_button.click()
-
-        # 等待警告框出现并接受
-        WebDriverWait(driver, 10).until(EC.alert_is_present())
-        alert = driver.switch_to.alert
-        alert_text = alert.text
-        print(f"警告文本: {alert_text}")
-        assert alert_text == "Chinese characters are not allowed in the user name."  # 根据实际的警告文本进行断言
-        alert.accept()
-
-    except Exception as e:
-        print(f"发生错误: {e}")
-        raise
+@pytest.fixture
+def driver():
+    # 使用 Chrome 浏览器并自动管理驱动
+    options = webdriver.ChromeOptions()
+    options.add_argument("--headless")  # 无头模式，不显示浏览器窗口
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+    yield driver
+    driver.quit()  # 测试结束后关闭浏览器


-# 对注册时密码不能是中文进行测试
-def test_register_password_with_chinese(driver, URL):
-    try:
-        driver.get(URL + "/signup")
+# 测试用户名不能包含中文
+def test_username_cannot_contain_chinese(driver):
+    # 打开注册页面
+    driver.get(f"{BASE_URL}/signup")

-        # 等待用户名输入框出现
-        username_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'username'))
-        )
-        username_elem.send_keys("validUsername123")  # 输入有效用户名
+    # 填写用户名（包含中文）
+    username = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "username"))
+    )
+    username.send_keys("测试用户")

-        # 等待密码输入框出现
-        password_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'password'))
-        )
-        password_elem.send_keys("测试密码")  # 输入中文密码
+    # 填写有效密码
+    password = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "password"))
+    )
+    password.send_keys("ValidPassword123!")

-        # 等待确认密码输入框出现
-        password2_elem = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located((By.ID, 'password2'))
-        )
-        password2_elem.send_keys("测试密码")  # 输入中文确认密码
+    # 确认密码
+    confirm_password = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "password2"))
+    )
+    confirm_password.send_keys("ValidPassword123!")

-        # 等待注册按钮出现并点击
-        signup_button = WebDriverWait(driver, 10).until(
-            EC.element_to_be_clickable((By.XPATH, '//button[@onclick="signup()"]'))
-        )
-        signup_button.click()
+    # 点击注册按钮
+    register_button = WebDriverWait(driver, 10).until(
+        EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "注册")]'))
+    )
+    register_button.click()

-        # 等待警告框出现并接受
-        WebDriverWait(driver, 10).until(EC.alert_is_present())
-        alert = driver.switch_to.alert
-        alert_text = alert.text
-        print(f"警告文本: {alert_text}")
-        assert alert_text == "Chinese characters are not allowed in the password."  # 根据实际的警告文本进行断言
-        alert.accept()
+    # 验证警告提示
+    alert = WebDriverWait(driver, 10).until(EC.alert_is_present())
+    assert "中文" in alert.text or "Chinese" in alert.text, "未显示用户名中文限制提示"
+    alert.accept()

-    except Exception as e:
-        print(f"发生错误: {e}")
-        raise
+
+# 测试密码不能包含中文
+def test_password_cannot_contain_chinese(driver):
+    # 打开注册页面
+    driver.get(f"{BASE_URL}/signup")
+
+    # 填写有效用户名
+    username = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "username"))
+    )
+    username.send_keys("validuser123")
+
+    # 填写密码（包含中文）
+    password = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "password"))
+    )
+    password.send_keys("测试密码")
+
+    # 确认密码（包含中文）
+    confirm_password = WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.ID, "password2"))
+    )
+    confirm_password.send_keys("测试密码")
+
+    # 点击注册按钮
+    register_button = WebDriverWait(driver, 10).until(
+        EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "注册")]'))
+    )
+    register_button.click()
+
+    # 验证警告提示
+    alert = WebDriverWait(driver, 10).until(EC.alert_is_present())
+    assert "中文" in alert.text or "Chinese" in alert.text, "未显示密码中文限制提示"
+    alert.accept()
--- a/app/test/test_bug544_tangxinyuan.py
+++ b/app/test/test_bug544_tangxinyuan.py
@ -7,8 +7,6 @@ from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC

 from helper import signup
-
-
 def has_punctuation(s):
    return any(c in string.punctuation for c in s)

--- a/app/test/test_bug546_lixiaofeng.py
+++ b/app/test/test_bug546_lixiaofeng.py
@ -1,39 +1,39 @@
-from selenium.webdriver.common.action_chains import ActionChains
-from helper import signup
-
-
-def test_highlight(driver, URL):
-    try:
-        # 打开网页
-        driver.get(URL)
-        driver.maximize_window()
-
-        # 注册
-        signup(URL, driver)
-
-        # 取消勾选“划词入库按钮”
-        highlight_checkbox = driver.find_element_by_id("chooseCheckbox")
-        driver.execute_script("arguments[0].click();", highlight_checkbox)
-
-        article = driver.find_element_by_id("article")
-
-        # 创建 ActionChains 对象
-        actions = ActionChains(driver)
-
-        # 移动鼠标到起点位置
-        actions.move_to_element(article)
-        # actions.move_to_element_with_offset(article, 50, 100)
-        # 按下鼠标左键
-        actions.click_and_hold()
-        # 拖动鼠标到结束位置
-        actions.move_by_offset(400,50)
-        # 释放鼠标左键
-        actions.release()
-        # 执行操作链
-        actions.perform()
-        # time.sleep(10)
-
-        assert driver.find_elements_by_class_name("highlighted") is not None
-    finally:
-        # 测试结束后关闭浏览器
+from selenium.webdriver.common.action_chains import ActionChains
+from helper import signup
+
+
+def test_highlight(driver, URL):
+    try:
+        # 打开网页
+        driver.get(URL)
+        driver.maximize_window()
+
+        # 注册
+        signup(URL, driver)
+
+        # 取消勾选“划词入库按钮”
+        highlight_checkbox = driver.find_element_by_id("chooseCheckbox")
+        driver.execute_script("arguments[0].click();", highlight_checkbox)
+
+        article = driver.find_element_by_id("article")
+
+        # 创建 ActionChains 对象
+        actions = ActionChains(driver)
+
+        # 移动鼠标到起点位置
+        actions.move_to_element(article)
+        # actions.move_to_element_with_offset(article, 50, 100)
+        # 按下鼠标左键
+        actions.click_and_hold()
+        # 拖动鼠标到结束位置
+        actions.move_by_offset(400,50)
+        # 释放鼠标左键
+        actions.release()
+        # 执行操作链
+        actions.perform()
+        # time.sleep(10)
+
+        assert driver.find_elements_by_class_name("highlighted") is not None
+    finally:
+        # 测试结束后关闭浏览器
        driver.quit()
--- a/app/test/test_stress.py
+++ b/app/test/test_stress.py
@ -1,43 +1,43 @@
-''' Contributed by Lin Junhong et al. 2023-06.'''
-
-import requests
-import multiprocessing
-import time
-
-def stress(username):
-    try:
-        data = {
-            'username': username,
-            'password': '123123'
-        }
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36 Edg/114.0.1823.51'
-        }
-        session = requests.session()
-        response = session.post(url='http://127.0.0.1:5000/signup', data=data, headers=headers)
-        print('Sign up ', response.status_code)
-        time.sleep(0.5)
-        response = session.post(url='http://127.0.0.1:5000/login', data=data, headers=headers)
-        print('Sign in ', response.status_code)
-        time.sleep(0.5)
-        response = session.get(url=f'http://127.0.0.1:5000/{username}/userpage', headers=headers)
-        print('User page', response.status_code)
-        time.sleep(0.5)
-        print(session.cookies)
-        for i in range(5):
-            response = session.get(url=f'http://127.0.0.1:5000/get_next_article/{username}', headers=headers, cookies=session.cookies)
-            time.sleep(0.5)
-            print(f'Next page ({i}) [{username}]')
-            print(response.status_code)
-            print(response.json()['today_article']['article_title'])
-    except Exception as e:
-        print(e)
-
-
-if __name__ == '__main__':
-    username = 'Learner'
-    pool = multiprocessing.Pool(processes=10)
-    for i in range(10):
-        pool.apply_async(stress, (f'{username}{i}',))
-    pool.close()
-    pool.join()
+''' Contributed by Lin Junhong et al. 2023-06.'''
+
+import requests
+import multiprocessing
+import time
+
+def stress(username):
+    try:
+        data = {
+            'username': username,
+            'password': '123123'
+        }
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36 Edg/114.0.1823.51'
+        }
+        session = requests.session()
+        response = session.post(url='http://127.0.0.1:5000/signup', data=data, headers=headers)
+        print('Sign up ', response.status_code)
+        time.sleep(0.5)
+        response = session.post(url='http://127.0.0.1:5000/login', data=data, headers=headers)
+        print('Sign in ', response.status_code)
+        time.sleep(0.5)
+        response = session.get(url=f'http://127.0.0.1:5000/{username}/userpage', headers=headers)
+        print('User page', response.status_code)
+        time.sleep(0.5)
+        print(session.cookies)
+        for i in range(5):
+            response = session.get(url=f'http://127.0.0.1:5000/get_next_article/{username}', headers=headers, cookies=session.cookies)
+            time.sleep(0.5)
+            print(f'Next page ({i}) [{username}]')
+            print(response.status_code)
+            print(response.json()['today_article']['article_title'])
+    except Exception as e:
+        print(e)
+
+
+if __name__ == '__main__':
+    username = 'Learner'
+    pool = multiprocessing.Pool(processes=10)
+    for i in range(10):
+        pool.apply_async(stress, (f'{username}{i}',))
+    pool.close()
+    pool.join()
--- a/app/test_estimator.py
+++ b/app/test_estimator.py
@ -0,0 +1,203 @@
+import pytest
+from vocabulary import Vocabulary
+
+# 示例词汇字典
+sample_word_difficulty_dict = {
+    'apple': 4,  # CET4
+    'banana': 6,  # CET6
+    'education': 5,  # OXFORD3000
+    'intelligent': 7,  # OXFORD5000
+    'BBC': 8  # BBC
+}
+
+# 实例化 Vocabulary
+estimator = Vocabulary(sample_word_difficulty_dict)
+
+# 测试：正常输入
+def test_get_word_difficulty():
+    # 对于已知的单词，直接使用 get_word_difficulty 方法获取难度
+    assert estimator.get_word_difficulty('apple') == 4
+    assert estimator.get_word_difficulty('banana') == 6
+    assert estimator.get_word_difficulty('education') == 5
+    assert estimator.get_word_difficulty('intelligent') == 7
+    assert estimator.get_word_difficulty('BBC') == 8
+
+def test_get_text_difficulty():
+    paragraph = 'apple banana education intelligent BBC'
+
+    # 使用 get_text_difficulty 方法计算文本的平均难度
+    avg_difficulty = estimator.get_text_difficulty(paragraph)
+    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2  # 允许误差范围
+
+
+# 测试：边界输入
+def test_empty_paragraph():
+    paragraph = ''
+    avg_difficulty = estimator.get_text_difficulty(paragraph)
+    assert avg_difficulty == 0
+
+
+def test_single_word():
+    paragraph = 'apple'
+    avg_difficulty = estimator.get_text_difficulty(paragraph)
+    assert avg_difficulty == 4  # 'apple' 的难度应该是 4
+
+
+# 测试：异常输入
+def test_word_not_in_dict():
+    # 确保未知单词返回默认难度 3
+    assert estimator.get_word_difficulty('unknown_word') == 3
+
+
+def test_paragraph_with_unknown_words():
+    paragraph = 'apple banana unknown_word'
+
+    word_list = paragraph.split()
+    difficulties = [
+        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
+        for word in word_list
+    ]
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    expected_avg_difficulty = (4 + 6 + 3) / 3  # 'unknown_word' 应返回默认难度 3
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
+
+
+# 额外的测试用例
+
+# 1. 测试输入大段文字的情况
+def test_large_paragraph():
+    paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000)  # 重复1000次
+    word_list = paragraph.split()
+
+    difficulties = [
+        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
+        for word in word_list
+    ]
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5  # 'apple', 'banana', 'education', 'intelligent', 'BBC'
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
+
+
+# 2. 测试单词有多个难度标签的情况
+sample_word_difficulty_dict_multiple = {
+    'apple': 5,  # OXFORD3000
+    'banana': 8,  # BBC
+    'education': 5,  # OXFORD3000
+    'intelligent': 7,  # OXFORD5000
+    'BBC': 8  # BBC
+}
+
+
+def test_multiple_difficulty_tags():
+    difficulty_dict = sample_word_difficulty_dict_multiple
+    # apple 出现在 CET4 和 OXFORD3000 中，应该取最大难度
+    assert difficulty_dict['apple'] == 5  # OXFORD3000 的难度更高
+    assert difficulty_dict['banana'] == 8  # BBC 是最高的难度
+    assert difficulty_dict['education'] == 5  # OXFORD3000
+    assert difficulty_dict['intelligent'] == 7  # OXFORD5000
+    assert difficulty_dict['BBC'] == 8  # BBC
+
+
+# 3. 测试所有单词的难度相同
+def test_all_words_same_difficulty():
+    sample_word_difficulty_dict_same = {
+        'apple': 4,
+        'banana': 4,
+        'education': 4,
+        'intelligent': 4,
+        'BBC': 4
+    }
+
+    difficulty_dict = sample_word_difficulty_dict_same
+
+    assert difficulty_dict['apple'] == 4
+    assert difficulty_dict['banana'] == 4
+    assert difficulty_dict['education'] == 4
+    assert difficulty_dict['intelligent'] == 4
+    assert difficulty_dict['BBC'] == 4
+
+    paragraph = 'apple banana education intelligent BBC'
+    word_list = paragraph.split()
+
+    difficulties = [
+        difficulty_dict.get(word, 3) for word in word_list
+    ]
+
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    assert avg_difficulty == 4
+
+
+# 4. 测试长文本包含多种不同的单词
+def test_mixed_difficulty_text():
+    paragraph = 'apple banana unknown_word random_word BBC intelligent education'
+
+    word_list = paragraph.split()
+    difficulties = [
+        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
+        for word in word_list
+    ]
+
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7  # 包括未知单词
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
+
+
+# 5. 测试多次调用 get_word_difficulty 对同一单词
+def test_repeated_get_word_difficulty():
+    word = 'banana'
+    difficulty_first = estimator.get_word_difficulty('banana')
+    difficulty_second = estimator.get_word_difficulty('banana')
+
+    assert difficulty_first == difficulty_second  # 确保每次返回的难度一致
+
+
+# 6. 测试难度返回默认值
+def test_default_difficulty_for_unknown_words():
+    unknown_word = 'xyz'
+    difficulty = estimator.get_word_difficulty('xyz')
+
+    assert difficulty == 3  # 默认值是3
+
+
+# 7. 测试复杂的段落
+def test_complex_paragraph_difficulty():
+    paragraph = 'apple banana unknown_word random_word BBC intelligent education'
+
+    word_list = paragraph.split()
+    difficulties = [
+        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
+        for word in word_list
+    ]
+
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
+
+
+# 8. 测试特殊字符和标点符号
+def test_paragraph_with_punctuation():
+    paragraph = 'apple, banana; education! intelligent... BBC?'
+
+    word_list = paragraph.split()  # 假设是通过空格分隔，实际上你可能需要更复杂的分割逻辑来处理标点
+    word_list = [word.strip('.,;!?') for word in word_list]  # 去掉标点
+
+    difficulties = [
+        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
+        for word in word_list
+    ]
+
+    avg_difficulty = sum(difficulties) / len(difficulties)
+    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5
+
+    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
+
+
+# 运行测试
+if __name__ == '__main__':
+    pytest.main()
--- a/app/test_vocabulary.py
+++ b/app/test_vocabulary.py
@ -0,0 +1,94 @@
+# Run this test script on the command line:
+#   pytest test_vocabulary.py
+#
+# Last modified by Mr Lan Hui on 2025-03-05
+
+from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
+
+
+def test_article_level_empty_content():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('')
+    assert article.level == 0
+
+def test_article_level_punctuation_only():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel(',')
+    assert article.level == 0
+
+def test_article_level_digit_only():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('1')
+    assert article.level == 0
+    
+def test_article_level_single_word():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('source')
+    assert 2 <= article.level <= 4
+
+def test_article_level_subset_vs_superset():
+    ''' Boundary case test '''
+    article1 = ArticleVocabularyLevel('source')
+    article2 = ArticleVocabularyLevel('open source')
+    assert article1.level < article2.level
+    
+def test_article_level_multiple_words():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('Producing Open Source Software - How to Run a Successful Free Software Project')
+    assert 3 <= article.level <= 5
+
+def test_article_level_short_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('At parties, people no longer give me a blank stare when I tell them I work in open source software. "Oh, yes — like Linux?" they say. I nod eagerly in agreement. "Yes, exactly! That\'s what I do." It\'s nice not to be completely fringe anymore. In the past, the next question was usually fairly predictable: "How do you make money doing that?" To answer, I\'d summarize the economics of free software: that there are organizations in whose interest it is to have certain software exist, but that they don\'t need to sell copies, they just want to make sure the software is available and maintained, as a tool instead of as a rentable monopoly.')
+    assert 4 <= article.level <= 6
+
+def test_article_level_medium_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('In considering the Origin of Species, it is quite conceivable that a naturalist, reflecting on the mutual affinities of organic beings, on their embryological relations, their geographical distribution, geological succession, and other such facts, might come to the conclusion that each species had not been independently created, but had descended, like varieties, from other species. Nevertheless, such a conclusion, even if well founded, would be unsatisfactory, until it could be shown how the innumerable species inhabiting this world have been modified, so as to acquire that perfection of structure and coadaptation which most justly excites our admiration. Naturalists continually refer to external conditions, such as climate, food, etc., as the only possible cause of variation. In one very limited sense, as we shall hereafter see, this may be true; but it is preposterous to attribute to mere external conditions, the structure, for instance, of the woodpecker, with its feet, tail, beak, and tongue, so admirably adapted to catch insects under the bark of trees. In the case of the misseltoe, which draws its nourishment from certain trees, which has seeds that must be transported by certain birds, and which has flowers with separate sexes absolutely requiring the agency of certain insects to bring pollen from one flower to the other, it is equally preposterous to account for the structure of this parasite, with its relations to several distinct organic beings, by the effects of external conditions, or of habit, or of the volition of the plant itself.')
+    assert 5 <= article.level <= 7
+    
+def test_article_level_long_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('These several facts accord well with my theory. I believe in no fixed law of development, causing all the inhabitants of a country to change abruptly, or simultaneously, or to an equal degree. The process of modification must be extremely slow. The variability of each species is quite independent of that of all others. Whether such variability be taken advantage of by natural selection, and whether the variations be accumulated to a greater or lesser amount, thus causing a greater or lesser amount of modification in the varying species, depends on many complex contingencies,—on the variability being of a beneficial nature, on the power of intercrossing, on the rate of breeding, on the slowly changing physical conditions of the country, and more especially on the nature of the other inhabitants with which the varying species comes into competition. Hence it is by no means surprising that one species should retain the same identical form much longer than others; or, if changing, that it should change less. We see the same fact in geographical distribution; for instance, in the land-shells and coleopterous insects of Madeira having come to differ considerably from their nearest allies on the continent of Europe, whereas the marine shells and birds have remained unaltered. We can perhaps understand the apparently quicker rate of change in terrestrial and in more highly organised productions compared with marine and lower productions, by the more complex relations of the higher beings to their organic and inorganic conditions of life, as explained in a former chapter. When many of the inhabitants of a country have become modified and improved, we can understand, on the principle of competition, and on that of the many all-important relations of organism to organism, that any form which does not become in some degree modified and improved, will be liable to be exterminated. Hence we can see why all the species in the same region do at last, if we look to wide enough intervals of time, become modified; for those which do not change will become extinct.')
+    assert 6 <= article.level <= 8
+    
+def test_user_level_empty_dictionary():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({})
+    assert user.level == 0
+
+def test_user_level_one_simple_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'simple':['202408050930']})
+    assert 0 < user.level <= 4
+    
+def test_user_level_invalid_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'xyz':['202408050930']})
+    assert user.level == 0
+ 
+def test_user_level_one_hard_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'pasture':['202408050930']})
+    assert 5 <= user.level <= 8
+ 
+def test_user_level_multiple_words():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel(
+        {'sessile': ['202408050930'], 'putrid': ['202408050930'], 'prodigal': ['202408050930'], 'presumptuous': ['202408050930'], 'prehension': ['202408050930'], 'pied': ['202408050930'], 'pedunculated': ['202408050930'], 'pasture': ['202408050930'], 'parturition': ['202408050930'], 'ovigerous': ['202408050930'], 'ova': ['202408050930'], 'orifice': ['202408050930'], 'obliterate': ['202408050930'], 'niggard': ['202408050930'], 'neuter': ['202408050930'], 'locomotion': ['202408050930'], 'lineal': ['202408050930'], 'glottis': ['202408050930'], 'frivolous': ['202408050930'], 'frena': ['202408050930'], 'flotation': ['202408050930'], 'ductus': ['202408050930'], 'dorsal': ['202408050930'], 'dearth': ['202408050930'], 'crustacean': ['202408050930'], 'cornea': ['202408050930'], 'contrivance': ['202408050930'], 'collateral': ['202408050930'], 'cirriped': ['202408050930'], 'canon': ['202408050930'], 'branchiae': ['202408050930'], 'auditory': ['202408050930'], 'articulata': ['202408050930'], 'alimentary': ['202408050930'], 'adduce': ['202408050930'], 'aberration': ['202408050930']}        
+    )
+    assert 6 <= user.level <= 8
+ 
+def test_user_level_consider_only_most_recent_words_difficult_words_most_recent():
+    ''' Consider only the most recent three words '''
+    user = UserVocabularyLevel(
+        {'pasture':['202408050930'], 'putrid': ['202408040000'], 'frivolous':['202408030000'], 'simple':['202408020000'], 'apple':['202408010000']}
+    )
+    assert 5 <= user.level <= 8
+ 
+def test_user_level_consider_only_most_recent_words_easy_words_most_recent():
+    ''' Consider only the most recent three words '''
+    user = UserVocabularyLevel(
+        {'simple':['202408050930'], 'apple': ['202408040000'], 'happy':['202408030000'], 'pasture':['202408020000'], 'putrid':['202408010000'], 'dearth':['202407310000']}
+    )
+    assert 4 <= user.level <= 5
--- a/app/vocabulary.py
+++ b/app/vocabulary.py
@ -0,0 +1,69 @@
+''' 
+   Estimate a user's vocabulary level given his vocabulary data
+   Estimate an English article's difficulty level given its content
+   Preliminary design
+   
+   Hui, 2024-09-23
+   Last upated: 2024-09-25, 2024-09-30
+'''
+
+import pickle
+
+
+def load_record(pickle_fname):
+    with open(pickle_fname, 'rb') as f:
+        d = pickle.load(f)
+    return d
+
+
+_TEST_MOCK = {
+    'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2,
+    'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3,
+    'available': 4, 'organizations': 4,
+    'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable:': 6,
+    'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7,
+    'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained,': 5,
+    'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7,
+    'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7,
+    'xyz': 0, '': 0
+}
+
+
+class VocabularyLevelEstimator:
+    _test = _TEST_MOCK
+
+    @property
+    def level(self):
+        total = 0.0
+        valid_count = 0
+        for word in self.word_lst:
+            if word in self._test:
+                total += self._test[word]
+                valid_count += 1
+        # if valid_count >= 40: total += 10
+        print(f'valid_count: {valid_count}, total: {total}')
+        if valid_count != 0 and total != 0: total += (valid_count * valid_count) / 100
+        return total / valid_count if valid_count > 0 else 0
+
+
+class UserVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, d):
+        self.d = d
+        self.word_lst = list(d.keys())
+        # just look at the most recently-added words
+
+
+class ArticleVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, content):
+        self.content = content
+        self.word_lst = content.lower().split()
+        # select the 10 most difficult words
+
+
+if __name__ == '__main__':
+    d = load_record('frequency_mrlan85.pickle')
+    print(d)
+    user = UserVocabularyLevel(d)
+    print(user.level)  # level is a property
+    article = ArticleVocabularyLevel('This is an interesting article')
+    print(article.level)
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -45,7 +45,6 @@ def freq(fruit):
    '''

    result = []
-    
    fruit = fruit.lower() # 字母转小写
    flst = fruit.split()  # 字符串转成list
    c = collections.Counter(flst)
@ -72,7 +71,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
    s = s.replace('--', ' ')
    s = s.strip() # 去除前后的空格
-    
+
    if '\'' in s:
        n = len(s)
        t = '' # 用来收集我需要保留的字符
--- a/build.sh
+++ b/build.sh
--- a/requirements.txt
+++ b/requirements.txt
@ -6,4 +6,3 @@ snowballstemmer==2.2.0
 Werkzeug==2.2.2
 requests
 pytest~=8.1.1
-Flask-HTTPAuth==4.4.0
--- a/test.py
+++ b/test.py
@ -0,0 +1 @@
+print("Hello World")
--- a/test_vocabulary_output_2025_04_10.txt
+++ b/test_vocabulary_output_2025_04_10.txt
@ -0,0 +1,10 @@
+(.venv) PS D:\2025软件项目管理\spm_vocabulary\spm_vocabulary>  pytest test_vocabulary.py
+===================================================================== test session starts ======================================================================
+platform win32 -- Python 3.12.4, pytest-8.3.5, pluggy-1.5.0
+rootdir: D:\2025软件项目管理\spm_vocabulary\spm_vocabulary
+collected 16 items                                                                                                                                               
+
+test_vocabulary.py ................                                                                                                                       [100%]
+
+====================================================================== 16 passed in 0.02s ====================================================================== 
+(