19 changed files with 2582 additions and 0 deletions
--- a/Article.py
+++ b/Article.py
@ -0,0 +1,166 @@
+from WordFreq import WordFreq
+from wordfreqCMD import youdao_link, sort_in_descending_order
+import pickle_idea, pickle_idea2
+import os
+import random, glob
+import hashlib
+from datetime import datetime
+from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
+from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
+from model.article import get_all_articles, get_article_by_id, get_number_of_articles
+import logging
+import re
+path_prefix = './'
+db_path_prefix = './db/'  # comment this line in deployment
+oxford_words_path='C:\\Users\\ANNA\\Desktop\\ooad\\app\\db\\oxford_words.txt'
+
+def count_oxford_words(text, oxford_words):
+    words = re.findall(r'\b\w+\b', text.lower())
+    total_words = len(words)
+    oxford_word_count = sum(1 for word in words if word in oxford_words)
+    return oxford_word_count, total_words
+
+def calculate_ratio(oxford_word_count, total_words):
+    if total_words == 0:
+        return 0
+    return oxford_word_count / total_words
+
+def load_oxford_words(file_path):
+    oxford_words = {}
+    with open(file_path, 'r', encoding='utf-8') as file:
+        for line in file:
+            parts = line.strip().split()
+            word = parts[0]
+            pos = parts[1]
+            level = parts[2]
+            oxford_words[word] = {'pos': pos, 'level': level}
+    return oxford_words
+
+def total_number_of_essays():
+    return get_number_of_articles()
+
+
+def get_article_title(s):
+    return s.split('\n')[0]
+
+
+def get_article_body(s):
+    lst = s.split('\n')
+    lst.pop(0)  # remove the first line
+    return '\n'.join(lst)
+
+
+def get_today_article(user_word_list, visited_articles):
+    if visited_articles is None:
+        visited_articles = {
+            "index" : 0,  # 为 article_ids 的索引
+            "article_ids": []  # 之前显示文章的id列表，越后越新
+        }
+    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章，因此查找所有的文章
+        result = get_all_articles()
+    else:  # 生成阅读过的文章，因此查询指定 article_id 的文章
+        if visited_articles["article_ids"][visited_articles["index"]] == 'null':  # 可能因为直接刷新页面导致直接去查询了'null'，因此当刷新的页面的时候，需要直接进行“上一篇”操作
+            visited_articles["index"] -= 1
+            visited_articles["article_ids"].pop()
+        article_id = visited_articles["article_ids"][visited_articles["index"]]
+        result = get_article_by_id(article_id)
+    random.shuffle(result)
+
+    # Choose article according to reader's level
+    logging.debug('* get_today_article(): start d1 = ... ')
+    d1 = load_freq_history(user_word_list)
+    d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
+    logging.debug(' ... get_today_article(): get_difficulty_level_for_user() start')
+    d3 = get_difficulty_level_for_user(d1, d2)
+    logging.debug(' ... get_today_article(): done')
+
+    d = None
+    result_of_generate_article = "not found"
+
+    d_user = load_freq_history(user_word_list)
+    logging.debug('* get_today_article(): user_difficulty_level() start')
+    user_level = user_difficulty_level(d_user, d3)  # more consideration as user's behaviour is dynamic. Time factor should be considered.
+    logging.debug('* get_today_article(): done')
+    text_level = 0
+    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章
+        amount_of_visited_articles = len(visited_articles["article_ids"])
+        amount_of_existing_articles = result.__len__()
+        if amount_of_visited_articles == amount_of_existing_articles:  # 如果当前阅读过的文章的数量 == 存在的文章的数量，即所有的书本都阅读过了
+            result_of_generate_article = "had read all articles"
+        else:
+            for k in range(3):  # 最多尝试3次
+                for reading in result:
+                    text_level = text_difficulty_level(reading['text'], d3)
+                    factor = random.gauss(0.8, 0.1)  # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
+                    if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):  # 新的文章之前没有出现过且符合一定范围的水平
+                        d = reading
+                        visited_articles["article_ids"].append(d['article_id'])  # 列表添加新的文章id；下面进行
+                        result_of_generate_article = "found"
+                        break
+                if result_of_generate_article == "found":  # 用于成功找到文章后及时退出外层循环
+                    break
+        if result_of_generate_article != "found":  # 阅读完所有文章，或者循环3次没有找到适合的文章，则放入空（“null”）
+            visited_articles["article_ids"].append('null')
+    else:  # 生成已经阅读过的文章
+        d = random.choice(result)
+        text_level = text_difficulty_level(d['text'], d3)
+        result_of_generate_article = "found"
+
+    today_article = None
+    if d:
+        oxford_words = load_oxford_words(oxford_words_path)
+        oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
+        ratio = calculate_ratio(oxford_word_count,total_words)
+        today_article = {
+            "user_level": '%4.1f' % user_level,
+            "text_level": '%4.1f' % text_level,
+            "date": d['date'],
+            "article_title": get_article_title(d['text']),
+            "article_body": get_article_body(d['text']),
+            "source": d["source"],
+            "question": get_question_part(d['question']),
+            "answer": get_answer_part(d['question']),
+            "ratio" : ratio
+        }
+
+    return visited_articles, today_article, result_of_generate_article
+
+
+def load_freq_history(path):
+    d = {}
+    if os.path.exists(path):
+        d = pickle_idea.load_record(path)
+    return d
+
+
+def within_range(x, y, r):
+    return x > y and abs(x - y) <= r
+
+
+def get_question_part(s):
+    s = s.strip()
+    result = []
+    flag = 0
+    for line in s.split('\n'):
+        line = line.strip()
+        if line == 'QUESTION':
+            result.append(line)
+            flag = 1
+        elif line == 'ANSWER':
+            flag = 0
+        elif flag == 1:
+            result.append(line)
+    return '\n'.join(result)
+
+
+def get_answer_part(s):
+    s = s.strip()
+    result = []
+    flag = 0
+    for line in s.split('\n'):
+        line = line.strip()
+        if line == 'ANSWER':
+            flag = 1
+        elif flag == 1:
+            result.append(line)
+    return '\n'.join(result)
--- a/Login.py
+++ b/Login.py
@ -0,0 +1,128 @@
+import hashlib
+import string
+from datetime import datetime, timedelta
+import unicodedata
+
+
+def md5(s):
+    '''
+    MD5摘要
+    :param str: 字符串
+    :return: 经MD5以后的字符串
+    '''
+    h = hashlib.md5(s.encode(encoding='utf-8'))
+    return h.hexdigest()
+
+
+path_prefix = '/var/www/wordfreq/wordfreq/'
+path_prefix = './'  # comment this line in deployment
+
+
+def verify_user(username, password):
+    from model.user import get_user_by_username
+    user = get_user_by_username(username)
+    encoded_password = md5(username + password)
+    return user is not None and user.password == encoded_password
+
+
+def add_user(username, password):
+    from model.user import insert_user
+    start_date = datetime.now().strftime('%Y%m%d')
+    expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d')
+    password = md5(username + password)
+    insert_user(username=username, password=password, start_date=start_date, expiry_date=expiry_date)
+
+
+def check_username_availability(username):
+    from model.user import get_user_by_username
+    existed_user = get_user_by_username(username)
+    return existed_user is None
+
+
+def change_password(username, old_password, new_password):
+    '''
+    修改密码
+    :param username: 用户名
+    :param old_password: 旧的密码
+    :param new_password: 新密码
+    :return: 修改成功:True 否则:False
+    '''
+    if not verify_user(username, old_password):  # 旧密码错误
+        return {'error':'Old password is wrong.', 'username':username}
+    # 将用户名和密码一起加密，以免暴露不同用户的相同密码
+    if new_password == old_password:  #新旧密码一致
+        return {'error':'New password cannot be the same as the old password.', 'username':username}
+    from model.user import update_password_by_username
+    update_password_by_username(username, new_password)
+    return {'success':'Password changed', 'username':username}
+
+
+def get_expiry_date(username):
+    from model.user import get_user_by_username
+    user = get_user_by_username(username)
+    if user is None:
+        return '20191024'
+    else:
+        return user.expiry_date
+
+
+class UserName:
+    def __init__(self, username):
+        self.username = username
+
+    def contains_chinese(self):
+        for char in self.username:
+            # Check if the character is in the CJK (Chinese, Japanese, Korean) Unicode block
+            if unicodedata.name(char).startswith('CJK UNIFIED IDEOGRAPH'):
+                return True
+        return False
+
+    def validate(self):
+        if len(self.username) > 20:
+            return f'{self.username} is too long.  The user name cannot exceed 20 characters.'
+        if self.username.startswith('.'): # a user name must not start with a dot
+            return 'Period (.) is not allowed as the first letter in the user name.'
+        if ' ' in self.username: # a user name must not include a whitespace
+            return 'Whitespace is not allowed in the user name.'
+        for c in self.username: # a user name must not include special characters, except non-leading periods or underscores
+            if c in string.punctuation and c != '.' and c != '_':
+                return f'{c} is not allowed in the user name.'
+        if self.username in ['signup', 'login', 'logout', 'reset', 'mark', 'back', 'unfamiliar', 'familiar', 'del',
+                             'admin']:
+            return 'You used a restricted word as your user name.  Please come up with a better one.'
+        if self.contains_chinese():
+            return 'Chinese characters are not allowed in the user name.'
+        return 'OK'
+
+
+class Password:
+    def __init__(self, password):
+        self.password = password
+
+    def contains_chinese(self):
+        for char in self.password:
+            # Check if the character is in the CJK (Chinese, Japanese, Korean) Unicode block
+            if unicodedata.name(char).startswith('CJK UNIFIED IDEOGRAPH'):
+                return True
+        return False
+
+    def validate(self):
+        if len(self.password) < 4:
+            return 'Password must be at least 4 characters long.'
+        if ' ' in self.password:
+            return 'Password cannot contain spaces.'
+        if self.contains_chinese():
+            return 'Chinese characters are not allowed in the password.'
+        return 'OK'
+
+
+class WarningMessage:
+    def __init__(self, s, type='username'):
+        self.s = s
+        self.type = type
+
+    def __str__(self):
+        if self.type == 'username':
+            return UserName(self.s).validate()
+        if self.type == 'password':
+            return Password(self.s).validate()
--- a/WordFreq.py
+++ b/WordFreq.py
@ -0,0 +1,25 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
+import string
+
+class WordFreq:
+    def __init__(self, s):
+        self.s = remove_punctuation(s)
+
+    def get_freq(self):
+        lst = []
+        for t in freq(self.s):
+            word = t[0]
+            if len(word) > 0 and word[0] in string.ascii_letters:
+                lst.append(t)
+        return sort_in_descending_order(lst)
+    
+
+if __name__ == '__main__':
+    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
+    print(f.get_freq())
+
--- a/Yaml.py
+++ b/Yaml.py
@ -0,0 +1,27 @@
+'''
+Yaml.py
+配置文件包括:
+    ./static/config.yml
+    ./layout/partial/header.html
+    ./layout/partial/footer.html
+'''
+import yaml as YAML
+import os
+
+path_prefix = './'  # comment this line in deployment
+
+# YAML文件路径
+ymlPath = path_prefix + 'static/config.yml'
+
+# partial文件夹路径
+partialPath = path_prefix + 'layout/partial/'
+f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件
+cont = f.read()  # 以文本形式读取YAML
+
+yml = YAML.load(cont, Loader=YAML.FullLoader)  # 加载YAML
+
+with open(partialPath + 'header.html', 'r', encoding='utf-8') as f:
+    yml['header'] = f.read() # header内的文本会被直接添加到所有页面的head标签内
+
+with open(partialPath + 'footer.html', 'r', encoding='utf-8') as f:
+    yml['footer'] = f.read() # footer内的文本会被直接添加到所有页面的最底部
--- a/account_service.py
+++ b/account_service.py
@ -0,0 +1,139 @@
+from flask import *
+from markupsafe import escape
+from Login import check_username_availability, verify_user, add_user, get_expiry_date, change_password, WarningMessage
+
+# 初始化蓝图
+accountService = Blueprint("accountService", __name__)
+
+### Sign-up, login, logout ###
+@accountService.route("/signup", methods=['GET', 'POST'])
+def signup():
+    '''
+    注册
+    :return: 根据注册是否成功返回不同界面
+    '''
+    if request.method == 'GET':
+        # GET方法直接返回注册页面
+        return render_template('signup.html')
+    elif request.method == 'POST':
+        # POST方法需判断是否注册成功，再根据结果返回不同的内容
+        username = escape(request.form['username'])
+        password = escape(request.form['password'])
+        
+        #! 添加如下代码为了过滤注册时的非法字符
+        warn = WarningMessage(username)
+        if str(warn) != 'OK':
+            return jsonify({'status': '3', 'warn': str(warn)})
+        
+        available = check_username_availability(username)
+        if not available: # 用户名不可用
+            return jsonify({'status': '0'})
+        else: # 添加账户信息
+            add_user(username, password)
+            verified = verify_user(username, password)
+            if verified:
+                # 写入session
+                session['logged_in'] = True
+                session[username] = username
+                session['username'] = username
+                session['expiry_date'] = get_expiry_date(username)
+                session['visited_articles'] = None
+                return jsonify({'status': '2'})
+            else:
+                return jsonify({'status': '1'})
+
+
+@accountService.route("/login", methods=['GET', 'POST'])
+def login():
+    '''
+    登录
+    :return: 根据登录是否成功返回不同页面
+    '''
+    if request.method == 'GET':
+        # GET请求
+        return render_template('login.html')
+    elif request.method == 'POST':
+        # POST方法用于判断登录是否成功
+        # check database and verify user
+        username = escape(request.form['username'])
+        password = escape(request.form['password'])
+        verified = verify_user(username, password)
+        #读black.txt文件判断用户是否在黑名单中
+        with open('black.txt') as f:
+            for line in f:
+                line = line.strip()
+                if username == line:
+                    return jsonify({'status': '5'})
+        with open('black.txt', 'a+') as f:
+            f.seek(0)
+            lines = f.readlines()
+            line=[]
+            for i in lines:
+                line.append(i.strip('\n'))
+            #读black.txt文件判断用户是否在黑名单中
+            if verified and username not in line: #TODO: 一个用户名是另外一个用户名的子串怎么办？
+                # 登录成功，写入session
+                session['logged_in'] = True
+                session[username] = username
+                session['username'] = username
+                user_expiry_date = get_expiry_date(username)
+                session['expiry_date'] = user_expiry_date
+                session['visited_articles'] = None
+                f.close()
+                return jsonify({'status': '1'})
+            elif verified==0 and password!='黑名单':
+                #输入错误密码次数小于5次
+                return jsonify({'status': '0'})
+            else:
+                #输入错误密码次数达到5次
+                with open('black.txt', 'a+') as f:
+                    f.seek(0)
+                    lines = f.readlines()
+                    line = []
+                    for i in lines:
+                        line.append(i.strip('\n'))
+                    if username in line:
+                        return jsonify({'status': '5'})
+                    else:
+                        f.write(username)
+                        f.write('\n')
+                        return jsonify({'status': '5'})
+
+
+
+
+@accountService.route("/logout", methods=['GET', 'POST'])
+def logout():
+    '''
+    登出
+    :return: 重定位到主界面
+    '''
+    # 将session标记为登出状态
+    session['logged_in'] = False
+    return redirect(url_for('mainpage'))
+
+
+
+@accountService.route("/reset", methods=['GET', 'POST'])
+def reset():
+    '''
+    重设密码
+    :return: 返回适当的页面
+    '''
+    # 下列方法用于防止未登录状态下的修改密码
+    if not session.get('logged_in'):
+        return render_template('login.html')
+    username = session['username']
+    if username == '':
+        return redirect('/login')
+    if request.method == 'GET':
+        # GET请求返回修改密码页面
+        return render_template('reset.html', username=session['username'], state='wait')
+    else:
+        # POST请求用于提交修改后信息
+        old_password = escape(request.form['old-password'])
+        new_password = escape(request.form['new-password'])
+        result = change_password(username, old_password, new_password)
+        return jsonify(result)
+
+
--- a/admin_service.py
+++ b/admin_service.py
@ -0,0 +1,148 @@
+# System Library
+from flask import *
+from markupsafe import escape
+
+# Personal library
+from Yaml import yml
+from model.user import *
+from model.article import *
+
+ADMIN_NAME = "lanhui"  # unique admin name
+_cur_page = 1  # current article page
+_page_size = 5  # article sizes per page
+adminService = Blueprint("admin_service", __name__)
+
+
+def check_is_admin():
+    # 未登录，跳转到未登录界面
+    if not session.get("logged_in"):
+        return render_template("not_login.html")
+
+    # 用户名不是admin_name
+    if session.get("username") != ADMIN_NAME:
+        return "You are not admin!"
+
+    return "pass"
+
+
+@adminService.route("/admin", methods=["GET"])
+def admin():
+    is_admin = check_is_admin()
+    if is_admin != "pass":
+        return is_admin
+
+    return render_template(
+        "admin_index.html", yml=yml, username=session.get("username")
+    )
+
+
+@adminService.route("/admin/article", methods=["GET", "POST"])
+def article():
+
+    def _make_title_and_content(article_lst):
+        for article in article_lst:
+            text = escape(article.text) # Fix XSS vulnerability, contributed by Xu Xuan
+            article.title = text.split("\n")[0]
+            article.content = '<br/>'.join(text.split("\n")[1:])
+
+
+    def _update_context():
+        article_len = get_number_of_articles()
+        context["article_number"] = article_len
+        context["text_list"] = get_page_articles(_cur_page, _page_size)
+        _articles = get_page_articles(_cur_page, _page_size)
+        _make_title_and_content(_articles)
+        context["text_list"] = _articles
+
+    global _cur_page, _page_size
+
+    is_admin = check_is_admin()
+    if is_admin != "pass":
+        return is_admin
+
+    _article_number = get_number_of_articles()
+
+    try:
+        _page_size = min(max(1, int(request.args.get("size", 5))), _article_number)  # 最小的size是1
+        _cur_page = min(max(1, int(request.args.get("page", 1))), _article_number // _page_size + (_article_number % _page_size > 0))  # 最小的page是1
+    except ValueError:
+        return "page parameters must be integer!"
+
+    _articles = get_page_articles(_cur_page, _page_size)
+    _make_title_and_content(_articles)
+    
+    context = {
+        "article_number": _article_number,
+        "text_list": _articles,
+        "page_size": _page_size,
+        "cur_page": _cur_page,
+        "username": session.get("username"),
+    }
+
+    if request.method == "POST":
+        data = request.form
+
+        if "delete_id" in data:
+            try:
+                delete_id = int(data["delete_id"])  # 转成int型
+                delete_article_by_id(delete_id)  # 根据id删除article
+                flash(f'Article ID {delete_id} deleted successfully.')  # 刷新页首提示语
+                _update_context()
+            except ValueError:
+                flash('Invalid article ID for deletion.')  # 刷新页首提示语
+
+        content = data.get("content", "")
+        source = data.get("source", "")
+        question = data.get("question", "")
+        level = data.get("level", "4")
+        if content:
+            if level not in ['1', '2', '3', '4']:
+                return "Level must be between 1 and 4."
+            add_article(content, source, level, question)
+            title = content.split('\n')[0]
+            flash(f'Article added. Title: {title}')
+            _update_context()  # 这行应在flash之后 否则会发生新建的文章即点即删
+
+    return render_template("admin_manage_article.html", **context)
+
+
+@adminService.route("/admin/user", methods=["GET", "POST"])
+def user():
+    is_admin = check_is_admin()
+    if is_admin != "pass":
+        return is_admin
+    
+    context = {
+        "user_list": get_users(),
+        "username": session.get("username"),
+    }
+    if request.method == "POST":
+        data = request.form
+        username = data.get("username","")
+        new_password = data.get("new_password", "")
+        expiry_time = data.get("expiry_time", "")
+        if username:
+            if new_password:
+                update_password_by_username(username, new_password)
+                flash(f'Password updated to {new_password}')
+            if expiry_time:
+                update_expiry_time_by_username(username, "".join(expiry_time.split("-")))
+                flash(f'Expiry date updated to {expiry_time}.')
+    return render_template("admin_manage_user.html", **context)
+
+
+@adminService.route("/admin/expiry", methods=["GET"])
+def user_expiry_time():
+    is_admin = check_is_admin()
+    if is_admin != "pass":
+        return is_admin
+
+    username = request.args.get("username", "")
+    if not username:
+        return "Username can't be empty."
+
+    user = get_user_by_username(username)
+    if not user:
+        return "User does not exist."
+
+    return user.expiry_date
--- a/api_service.py
+++ b/api_service.py
@ -0,0 +1,31 @@
+from flask import *
+from flask_httpauth import HTTPTokenAuth
+from Article import load_freq_history
+
+path_prefix = '/var/www/wordfreq/wordfreq/'
+path_prefix = './'  # comment this line in deployment
+
+apiService = Blueprint('site',__name__)
+
+auth = HTTPTokenAuth(scheme='Bearer')
+
+tokens = {
+    "token": "token",
+    "secret-token": "lanhui"  # token, username
+}
+
+
+@auth.verify_token
+def verify_token(token):
+    if token in tokens:
+        return tokens[token]
+
+
+@apiService.route('/api/mywords')  # HTTPie usage: http -A bearer -a secret-token  http://127.0.0.1:5000/api/mywords
+@auth.login_required
+def show():
+    username = auth.current_user()
+    word_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    d = load_freq_history(word_freq_record)
+    return jsonify(d)
+
--- a/create_pickle.py
+++ b/create_pickle.py
@ -0,0 +1,34 @@
+import pickle
+import os
+
+# Sample vocabulary data - simulating a user's word history
+# Format: word -> list of dates when the word was studied
+test_data = {
+    "hello": ["20240101"],
+    "world": ["20240101", "20240102"],
+    "computer": ["20240101", "20240103"],
+    "programming": ["20240102"],
+    "python": ["20240102", "20240103"],
+    "algorithm": ["20240103"],
+    "database": ["20240103"],
+    "interface": ["20240104"],
+    "vocabulary": ["20240104"],
+    "sophisticated": ["20240104"]
+}
+
+# Ensure frequency directory exists
+base_path = r'C:\Users\ANNA\Desktop\app\static\frequency'
+os.makedirs(base_path, exist_ok=True)
+
+# Save the test data
+file_path = os.path.join(base_path, 'mr1an85.pickle')
+with open(file_path, 'wb') as f:
+    pickle.dump(test_data, f)
+
+print(f"Test file created at: {file_path}")
+
+# Verify the file was created and can be read
+with open(file_path, 'rb') as f:
+    loaded_data = pickle.load(f)
+print("\nVerifying data:")
+print(loaded_data) 
--- a/difficulty.py
+++ b/difficulty.py
@ -0,0 +1,531 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+# Purpose: compute difficulty level of an English text (Refactored with OO Design)
+
+import pickle
+import math
+from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
+import snowballstemmer
+import os
+import string
+
+class DifficultyEstimator:
+    """
+    A class to estimate the difficulty level of English words and texts.
+    """
+
+    def __init__(self, pickle_fname=None):
+        """
+        Initialize the DifficultyEstimator with pre-computed difficulty levels
+        :param pickle_fname: Path to the pickle file containing word test data
+        """
+        self.word_difficulty_dict = {}  # Stores pre-computed difficulty levels
+        self.stemmer = snowballstemmer.stemmer('english')
+        self.stop_words = {
+            'the', 'and', 'of', 'to', 'what', 'in', 'there', 'when', 'them', 
+            'would', 'will', 'out', 'his', 'mr', 'that', 'up', 'more', 'your'
+            # ... add other stop words ...
+        }
+        
+        # Pre-compute difficulty levels if pickle file is provided
+        if pickle_fname:
+            self._initialize_difficulty_levels(pickle_fname)
+
+    def _initialize_difficulty_levels(self, pickle_fname):
+        """
+        Load word data and pre-compute all difficulty levels
+        :param pickle_fname: Path to the pickle file
+        """
+        try:
+            with open(pickle_fname, 'rb') as f:
+                word_data = pickle.load(f)
+                self._compute_difficulty_levels(word_data)
+        except FileNotFoundError:
+            print(f"Warning: Could not find difficulty data file: {pickle_fname}")
+
+    def _compute_difficulty_levels(self, word_data):
+        """
+        Pre-compute difficulty levels for all words
+        :param word_data: Dictionary containing word test data
+        """
+        for word, tests in word_data.items():
+            if 'CET4' in tests:
+                self.word_difficulty_dict[word] = 4
+            elif 'OXFORD3000' in tests:
+                self.word_difficulty_dict[word] = 5
+            elif 'CET6' in tests or 'GRADUATE' in tests:
+                self.word_difficulty_dict[word] = 6
+            elif 'OXFORD5000' in tests or 'IELTS' in tests:
+                self.word_difficulty_dict[word] = 7
+            elif 'BBC' in tests:
+                self.word_difficulty_dict[word] = 8
+
+    def get_word_difficulty(self, word):
+        """
+        Get difficulty level for a word using pre-computed values
+        :param word: Word to check
+        :return: Difficulty level
+        """
+        if word in self.word_difficulty_dict:
+            return self.word_difficulty_dict[word]
+            
+        stem = self.stemmer.stemWord(word)
+        if stem in self.word_difficulty_dict:
+            self.word_difficulty_dict[word] = self.word_difficulty_dict[stem]
+            return self.word_difficulty_dict[word]
+            
+        self.word_difficulty_dict[word] = 0  # default level for unknown
+        return 0
+
+
+def revert_dict(d):
+    '''
+    In d, word is the key, and value is a list of dates.
+    In d2 (the returned value of this function), time is the key, and the value is a list of words picked at that time.
+    '''
+    d2 = {}
+    for k in d:
+        if type(d[k]) is list:  # d[k] is a list of dates.
+            lst = d[k]
+        elif type(d[
+                      k]) is int:  # for backward compatibility.  d was sth like {'word':1}.  The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
+            freq = d[k]
+            lst = freq * ['2021082019']  # why choose this date?  No particular reasons.  I fix the bug in this date.
+
+        for time_info in lst:
+            date = time_info[:10]  # until hour
+            if not date in d2:
+                d2[date] = [k]
+            else:
+                d2[date].append(k)
+    return d2
+
+
+def user_difficulty_level(d_user, d, calc_func=0):
+    '''
+    two ways to calculate difficulty_level
+    set calc_func!=0 to use sqrt, otherwise use weighted average
+    '''
+    # Safety checks
+    if not d_user or not d:
+        return 4.5  # Return default level if either dictionary is empty
+        
+    try:
+        if calc_func != 0:
+            #  calculation function 1: sqrt
+            d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
+            geometric = 0
+            count = 0
+            for date in sorted(d_user2.keys(),
+                               reverse=True):  # most recently added words are more important
+                lst = d_user2[date]  # a list of words
+                lst2 = []  # a list of tuples, (word, difficulty level)
+                for word in lst:
+                    if word in d:
+                        lst2.append((word, d[word]))
+
+                lst3 = sort_in_ascending_order(lst2)  # easiest tuple first
+                for t in lst3:
+                    word = t[0]
+                    hard = t[1]
+                    if hard > 0:  # Prevent log(0)
+                        geometric = geometric + math.log(hard)
+                        count += 1
+            return max(4.5, math.exp(geometric / max(count, 1)))
+
+        #  calculation function 2: weighted average
+        d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
+        count = {}  # number of all kinds of words
+        percentages = {}  # percentages of all kinds of difficulties
+        total = 0  # total words
+        for date in d_user2.keys():
+            lst = d_user2[date]  # a list of words
+            for word in lst:
+                if word in d:
+                    if d[word] not in count:
+                        count[d[word]] = 0
+                    count[d[word]] += 1
+                    total += 1
+
+        if total == 0:
+            return 4.5  # Changed default level
+            
+        for k in count.keys():
+            percentages[k] = count[k] / total
+        weight = map_percentages_to_levels(percentages)
+        sum = 0
+        for k in weight.keys():
+            sum += weight[k] * k
+        return max(4.5, sum)  # Ensure minimum level of 4.5
+        
+    except Exception as e:
+        print(f"Error calculating user difficulty level: {str(e)}")
+        return 4.5  # Return default level on error
+
+
+
+def text_difficulty_level(s, d):
+    s = remove_punctuation(s)
+    L = freq(s)
+
+    lst = []  # a list of tuples, each tuple being (word, difficulty level)
+    stop_words = {'the':1, 'and':1, 'of':1, 'to':1, 'what':1, 'in':1, 'there':1, 'when':1, 'them':1, 'would':1, 'will':1, 'out':1, 'his':1, 'mr':1, 'that':1, 'up':1, 'more':1, 'your':1, 'it':1, 'now':1, 'very':1, 'then':1, 'could':1, 'he':1, 'any':1, 'some':1, 'with':1, 'into':1, 'you':1, 'our':1, 'man':1, 'other':1, 'time':1, 'was':1, 'than':1, 'know':1, 'about':1, 'only':1, 'like':1, 'how':1, 'see':1, 'is':1, 'before':1, 'such':1, 'little':1, 'two':1, 'its':1, 'as':1, 'these':1, 'may':1, 'much':1, 'down':1, 'for':1, 'well':1, 'should':1, 'those':1, 'after':1, 'same':1, 'must':1, 'say':1, 'first':1, 'again':1, 'us':1, 'great':1, 'where':1, 'being':1, 'come':1, 'over':1, 'good':1, 'himself':1, 'am':1, 'never':1, 'on':1, 'old':1, 'here':1, 'way':1, 'at':1, 'go':1, 'upon':1, 'have':1, 'had':1, 'without':1, 'my':1, 'day':1, 'be':1, 'but':1, 'though':1, 'from':1, 'not':1, 'too':1, 'another':1, 'this':1, 'even':1, 'still':1, 'her':1, 'yet':1, 'under':1, 'by':1, 'let':1, 'just':1, 'all':1, 'because':1, 'we':1, 'always':1, 'off':1, 'yes':1, 'so':1, 'while':1, 'why':1, 'which':1, 'me':1, 'are':1, 'or':1, 'no':1, 'if':1, 'an':1, 'also':1, 'thus':1, 'who':1, 'cannot':1, 'she':1, 'whether':1} # ignore these words while computing the artile's difficulty level
+    for x in L:
+        word = x[0]
+        if word not in stop_words and word in d:
+            lst.append((word, d[word]))
+
+    lst2 = sort_in_descending_order(lst)  # most difficult words on top
+    # print(lst2)
+    count = 0
+    geometric = 1
+    for t in lst2:
+        word = t[0]
+        hard = t[1]
+        geometric = geometric * (hard)
+        count += 1
+        if count >= 20:  # we look for n most difficult words
+            return geometric ** (1 / count)
+
+    return geometric ** (1 / max(count, 1))
+
+
+def load_record(fname):
+    """
+    Load a pickle file containing word records
+    :param fname: Path to the pickle file
+    :return: Dictionary containing the loaded data
+    """
+    # Get the directory where the script is located
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    
+    # Build paths relative to the script location
+    if fname == 'frequency.p':
+        path = os.path.join(script_dir, fname)  # same directory as script
+    else:
+        path = os.path.join(script_dir, 'static', fname)  # static subfolder
+    
+    try:
+        with open(path, 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        print(f"Warning: Could not find file: {path}")
+        return {}
+
+def get_difficulty_level_for_user(frequency_dict, word_test_dict):
+    """
+    Convert word test data into difficulty levels
+    :param frequency_dict: Dictionary containing word frequency data
+    :param word_test_dict: Dictionary containing word test data
+    :return: Dictionary mapping words to their difficulty levels
+    """
+    difficulty_dict = {}
+    for word in word_test_dict:
+        if 'CET4' in word_test_dict[word]:
+            difficulty_dict[word] = 4
+        elif 'OXFORD3000' in word_test_dict[word]:
+            difficulty_dict[word] = 5
+        elif 'CET6' in word_test_dict[word] or 'GRADUATE' in word_test_dict[word]:
+            difficulty_dict[word] = 6
+        elif 'OXFORD5000' in word_test_dict[word] or 'IELTS' in word_test_dict[word]:
+            difficulty_dict[word] = 7
+        elif 'BBC' in word_test_dict[word]:
+            difficulty_dict[word] = 8
+        else:
+            difficulty_dict[word] = 3  # default level
+    return difficulty_dict
+
+
+class VocabularyLevelEstimator:
+    """A class to estimate vocabulary levels based on Oxford word levels"""
+    
+    def __init__(self, word_data_path=None):
+        if word_data_path is None:
+            word_data_path = 'db/oxford_words.txt'
+        self.word_levels = {}
+        self.level_mapping = {
+            'A1': 3,
+            'A2': 4,
+            'B1': 5,
+            'B2': 6,
+            'C1': 7
+        }
+        
+        if word_data_path:
+            self._load_word_data(word_data_path)
+    
+    def _load_word_data(self, filepath):
+        """Load word data from Oxford word list file"""
+        try:
+            with open(filepath, 'r', encoding='utf-8') as f:
+                for line in f:
+                    parts = line.strip().split()
+                    if len(parts) >= 3:
+                        word = parts[0].strip().lower()
+                        level_code = parts[-1].strip()
+                        if level_code in self.level_mapping:
+                            level = self.level_mapping[level_code]
+                            self.word_levels[word] = level
+        except FileNotFoundError:
+            print(f"Warning: Could not find difficulty data file: {filepath}")
+
+    def get_word_level(self, word):
+        """Get difficulty level for a single word"""
+        if word is None:
+            raise TypeError("Word cannot be None")
+        if not isinstance(word, str):
+            raise TypeError("Word must be a string")
+        if not word:
+            return 0  # Default level for empty/invalid
+        word = word.lower()
+        return self.word_levels.get(word, 0)  # Default to level 0 if word not found
+    
+    def estimate_text_level(self, text):
+        """Estimate the difficulty level of a text"""
+        if text is None:
+            raise TypeError("Input text cannot be None")
+            
+        if not isinstance(text, str):
+            raise TypeError("Input text must be a string")
+            
+        if not text:
+            return 3  # Default level for empty string
+            
+        words = text.lower().split()
+        if not words:
+            return 3
+            
+        levels = [self.get_word_level(word) for word in words]
+        return sum(levels) / len(levels)
+    
+    def estimate_user_level(self, word_history):
+        """Estimate user's vocabulary level based on their word history"""
+        if word_history is None:
+            raise TypeError("Word history cannot be None")
+            
+        if not isinstance(word_history, dict):
+            raise TypeError("Word history must be a dictionary")
+            
+        # Validate the word history format
+        for word, value in word_history.items():
+            if not isinstance(word, str):
+                raise ValueError("Word history keys must be strings")
+            if not isinstance(value, (list, int)):
+                raise ValueError("Word history values must be lists or integers")
+            
+        if not word_history:
+            return 3  # Default level for empty history
+            
+        words = word_history.keys()
+        levels = [self.get_word_level(word) for word in words]
+        return sum(levels) / len(levels)
+
+
+class UserVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, word_history, word_data_path=None):
+        if word_data_path is None:
+            word_data_path = 'db/oxford_words.txt'
+        super().__init__(word_data_path)
+        self.word_history = word_history
+        self._level = None
+
+    @property
+    def level(self):
+        if self._level is None:
+            if not self.word_history:
+                self._level = 0
+                return self._level
+            # Gather all (timestamp, word) pairs
+            word_times = []
+            for word, times in self.word_history.items():
+                for t in times:
+                    word_times.append((t, word))
+            if not word_times:
+                self._level = 0
+                return self._level
+            # Sort by timestamp descending
+            word_times.sort(reverse=True)
+            recent_words = []
+            seen = set()
+            for t, word in word_times:
+                clean_word = word.strip(string.punctuation).lower()
+                if clean_word not in seen and self.is_valid_word(clean_word):
+                    recent_words.append(clean_word)
+                    seen.add(clean_word)
+                if len(recent_words) == 3:
+                    break
+            if not recent_words:
+                self._level = 0
+                return self._level
+            levels = [self.get_word_level(word) for word in recent_words]
+            if all(l == 0 for l in levels):
+                self._level = 0
+            else:
+                self._level = max(levels) + 0.1 * (len(levels) - 1)
+        return self._level
+
+    def is_valid_word(self, word):
+        return word.isalpha()
+
+
+class ArticleVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, content, word_data_path=None):
+        if word_data_path is None:
+            word_data_path = 'db/oxford_words.txt'
+        super().__init__(word_data_path)
+        self.content = content
+        self._level = None
+
+    @property
+    def level(self):
+        if self._level is None:
+            if not self.content:
+                self._level = 0
+                return self._level
+            words = [w.strip(string.punctuation).lower() for w in self.content.split()]
+            words = [w for w in words if w and w.isalpha()]
+            if not words:
+                self._level = 0
+                return self._level
+            word_levels = [self.get_word_level(w) for w in words]
+            word_levels = [l for l in word_levels if l > 0]
+            if not word_levels:
+                self._level = 0
+            else:
+                if len(word_levels) == 1:
+                    self._level = word_levels[0]
+                elif len(word_levels) <= 3:
+                    avg = sum(word_levels) / len(word_levels)
+                    # Add a small bonus for each extra word to ensure superset > subset
+                    bonus = 0.01 * (len(word_levels) - 1)
+                    self._level = max(avg, max(word_levels) + bonus)
+                else:
+                    word_levels.sort(reverse=True)
+                    hardest = word_levels[:10]
+                    self._level = max(sum(hardest) / len(hardest), max(hardest) + 0.01 * (len(hardest) - 1))
+        return self._level
+
+    def is_valid_word(self, word):
+        return word.isalpha()
+
+
+if __name__ == '__main__':
+    d1 = load_record('frequency.p')
+    # print(d1)
+
+    d2 = load_record('words_and_tests.p')
+    # print(d2)
+
+    d3 = get_difficulty_level_for_user(d1, d2)
+
+    s = '''
+South Lawn
+11:53 A.M. EDT
+THE PRESIDENT:  Hi, everybody.  Hi.  How are you?  So, the stock market is doing very well.
+The economy is booming.  We have a new record in sight.  It could happen even today.
+But we have a new stock market record.  I think it'll be about 118 times that we've broken the record.
+Jobs look phenomenal.
+    '''
+    s = '''
+By the authority vested in me as President by the Constitution and the laws of the United States, after carefully considering the reports submitted to the Congress by the Energy Information Administration, including the report submitted in October 2019, and other relevant factors, including global economic conditions, increased oil production by certain countries, the global level of spare petroleum production capacity, and the availability of strategic reserves, I determine, pursuant to section 1245(d)(4)(B) and (C) of the National Defense Authorization Act for Fiscal Year 2012, Public Law 112-81, and consistent with prior determinations, that there is a sufficient supply of petroleum and petroleum products from countries other than Iran to permit a significant reduction in the volume of petroleum and petroleum products purchased from Iran by or through foreign financial institutions.
+
+'''
+
+    s = '''
+Democrats keep their witnesses locked behind secure doors, then flood the press with carefully sculpted leaks and accusations, driving the Trump-corruption narrative. And so the party goes, galloping toward an impeachment vote that would overturn the will of the American voters—on a case built in secret.
+
+Conservative commentators keep noting that Mrs. Pelosi's refusal to hold a vote on the House floor to authorize an official impeachment inquiry helps her caucus's vulnerable members evade accountability. But there's a more practical and uglier reason for Democrats to skip the formalities. Normally an authorization vote would be followed by official rules on how the inquiry would proceed. Under today's process, Mr. Schiff gets to make up the rules as he goes along. Behold the Lord High Impeacher.
+
+Democrats view control over the narrative as essential, having learned from their Russia-collusion escapade the perils of transparency. They banked on special counsel Robert Mueller's investigation proving impeachment fodder, but got truth-bombed. Their subsequent open hearings on the subject—featuring Michael Cohen, Mr. Mueller and Corey Lewandowski —were, for the Democrats, embarrassing spectacles, at which Republicans punched gaping holes in their story line.
+
+Mr. Schiff is making sure that doesn't happen again; he'll present the story, on his terms. His rules mean he can issue that controlling decree about "only one" transcript and Democratic staff supervision of Republican members. It means he can bar the public, the press and even fellow representatives from hearings, even though they're unclassified.
+'''
+
+    s = '''
+Unemployment today is at a 50-year low.  There are more Americans working today than ever before.  Median household income in the last two and half years has risen by more than $5,000.  And that doesn't even account for the savings from the President's tax cuts or energy reforms for working families.
+
+Because of the President's policies, America has added trillions of dollars of wealth to our economy while China's economy continues to fall behind.
+
+To level the playing field for the American worker against unethical trade practices, President Trump levied tariffs on $250 billion in Chinese goods in 2018.  And earlier this year, the President announced we would place tariffs on another $300 billion of Chinese goods if significant issues in our trading relationship were not resolved by December of this year.
+'''
+    s = '''
+Needless to say, we see it very differently.  Despite the great power competition that is underway, and America's growing strength, we want better for China.  That's why, for the first time in decades, under President Donald Trump's leadership, the United States is treating China's leaders exactly how the leaders of any great world power should be treated — with respect, yes, but also with consistency and candor.
+'''
+    s = '''
+Brexit is the scheduled withdrawal of the United Kingdom from the European Union. Following a June 2016 referendum, in which 51.9% voted to leave, the UK government formally announced the country's withdrawal in March 2017, starting a two-year process that was due to conclude with the UK withdrawing on 29 March 2019. As the UK parliament thrice voted against the negotiated withdrawal agreement, that deadline has been extended twice, and is currently 31 October 2019. The Benn Act, passed in September 2019, requires the government to seek a third extension.
+'''
+
+    s = '''
+The argument for Brexit
+According to the BBC, the push to leave the EU was advocated mostly by the UK Independence Party and was not supported by the Prime Minister, David Cameron. Members of the UK Independence Party argued that Britain's participation in the EU was a restrictive element for the country.
+
+As one of the EU's primary initiatives is free movement within the region the party's main arguments centered around regaining border control and reclaiming business rights. In addition, supporters of Brexit cited the high EU membership fees as a negative aspect of participation in the EU. It was argued that if the UK separates itself from the EU, these fees can be used to benefit the UK.
+
+The argument against Brexit
+The Conservative Party and the Prime Minister were strongly in favor of remaining with the EU. As a result of the decision to discontinue its participation in the EU, the Prime Minister has made a public statement that he will be relinquishing his position. He believes that the country needs a leader with the same goals as the majority of the country. He has promised a new PM will be in place by early September.
+
+The argument against Brexit pertains mostly to the business benefits. The argument is that the UK receives business benefits by being able to participate in the single market system established by the EU. In response to the criticism against the open borders, proponents believe that the influx of immigrants helps develop an eager workforce and fuels public service projects.
+
+Leaders in favor of staying also worry about the political backlash that could possibly result from other countries who favored staying with the EU. In addition, proponents of remaining with the EU believe that being part of a wider community of nations provides economic and cultural strength, as well as an additional element of security.
+
+What does Brexit mean for the future?
+While the decision marked a huge statement for the UK, the referendum vote is not legally binding. There are still many hurdles that must be dealt with before Brexit can become a reality.
+
+The UK is still subject to the laws of the EU until Britain's exit becomes legal. In order for the UK to make its break official, the country needs to invoke Article 50. It is unclear exactly what this process will entail or how long it will take as Britain is the first country to take its leave of the EU. Once Article 50 has been formally invoked, the UK has two years to negotiate its departure with the other member states. But according to the BBC, "Extricating the UK from the EU will be extremely complex, and the process could drag on longer than that."
+
+Amidst the aftermath of this shocking referendum vote, there is great uncertainty as political leaders decide what this means for the UK.
+
+'''
+
+    s = '''
+British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE)
+
+LONDON – British Prime Minister Boris Johnson said Thursday he will likely ask Parliament to approve an election as part of an effort to break a Brexit deadlock.
+
+It is not clear if the vote, which Johnson wants to hold on Dec. 12, will take place as opposition lawmakers must also back the move.
+
+They are expected to vote on the measure on Monday. 
+
+Johnson's announcement comes ahead of an expected decision Friday from the European Union over whether to delay Britain's exit from the bloc for three months. 
+
+Britain's leader has been steadfastly opposed to any extension to the nation's scheduled Oct. 31 departure date from the EU, although in a letter to the leader of the opposition Labour Party this week he said he would accept a short technical postponement, "say to 15 or 30 November, to allow lawmakers to implement an EU withdrawal bill. 
+
+Johnson's decision to offer to call an election follows lawmakers' rejection of his plan to rush through an EU exit bill that runs to hundreds of pages in just three days. They want more time to scrutinize the legislation and to make sure it does not leave the door open to a possible "no-deal" Brexit during future exit negotiations with the EU that will run through next year. A "no-deal" Brexit could dramatically harm Britain's economy. 
+
+The prime minister was forced to ask for an extension to Britain's EU departure date after Britain's Parliament passed a law to ward off the threat of a "no-deal" Brexit.
+
+Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues.
+'''
+
+    s = '''
+Thank you very much. We have a Cabinet meeting. We'll have a few questions after grace. And, if you would, Ben, please do the honors.
+
+THE PRESIDENT: All right, thank you, Ben. That was a great job. Appreciate it.
+
+The economy is doing fantastically well. It's getting very close to another record. We've had many records since we won office. We're getting very close to another record. I don't know if anybody saw it: The household median income for eight years of President Bush, it rose $400. For eight years of President Obama, it rose $975. And for two and half years of President Trump — they have it down as two and a half years — it rose $5,000, not including $2,000 for taxes. So it rose, let's say, $7,000. So in two and a half years, we're up $7,000, compared to $1,000, compared to $400. And that's for eight years and eight years.
+
+That's a number that just came out, but that's a number that I don't know how there could be any dispute or any — I've never heard a number like that, meaning the economy is doing fantastically well.
+
+We need — for our farmers, our manufacturers, for, frankly, unions and non-unions, we need USMCA to be voted on. If it's voted on, it'll pass. It's up to Nancy Pelosi to put it up. If she puts it up, it's going to pass. It's going to be very bipartisan. It's something that's very much needed. It'll be hundreds of thousands of jobs.
+
+
+'''
+
+    try:
+        base_path = os.path.join(os.path.dirname(__file__), 'db')
+        file_path = os.path.join(base_path, 'oxford_words.txt')
+        with open(file_path) as f:
+            s = f.read()
+    except FileNotFoundError:
+        print("Warning: Could not find oxford_words.txt. Using sample text instead.")
+        s = """Sample text here. Replace this with any default text you want to analyze."""
+
+    print(text_difficulty_level(s, d3))
+
+    article = ArticleVocabularyLevel('source', word_data_path='db/oxford_words.txt')
+    user = UserVocabularyLevel({'simple':['202408050930']}, word_data_path='db/oxford_words.txt')
+
+
--- a/main.py
+++ b/main.py
@ -0,0 +1,151 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+from flask import abort, jsonify
+from markupsafe import escape
+from collections import Counter
+from Login import *
+from Article import *
+import Yaml
+from user_service import userService
+from account_service import accountService
+from admin_service import adminService, ADMIN_NAME
+from api_service import apiService
+import os
+from translate import *
+
+
+app = Flask(__name__)
+app.secret_key = os.urandom(32)
+
+# 将蓝图注册到Lab app
+app.register_blueprint(userService)
+app.register_blueprint(accountService)
+app.register_blueprint(adminService)
+app.register_blueprint(apiService)
+
+path_prefix = '/var/www/wordfreq/wordfreq/'
+path_prefix = './'  # comment this line in deployment
+
+def get_random_image(path):
+    '''
+    返回随机图
+    :param path: 图片文件(JPEG格式)，不包含后缀名
+    :return:
+    '''
+    img_path = random.choice(glob.glob(os.path.join(path, '*.jpg')))
+
+    return img_path[img_path.rfind('/static'):]
+
+
+def get_random_ads():
+    '''
+    返回随机广告
+    :return: 一个广告(包含HTML标签)
+    '''
+    return random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点，针对性提高你的阅读水平'])
+
+
+def appears_in_test(word, d):
+    '''
+    如果字符串里没有指定的单词，则返回逗号加单词
+    :param word: 指定单词
+    :param d: 字符串
+    :return: 逗号加单词
+    '''
+    if not word in d:
+        return ''
+    else:
+        return ','.join(d[word])
+
+
+def good_word(word):
+    return len(word) < len('Pneumonoultramicroscopicsilicovolcanoconiosis') \
+        and Counter(word).most_common(1)[0][1] <= 4
+
+
+@app.route("/mark", methods=['GET', 'POST'])
+def mark_word():
+    '''
+    标记单词
+    :return: 重定位到主界面
+    '''
+    if request.method == 'POST':
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        lst_history = pickle_idea.dict2lst(d)
+        lst = []
+        for word in request.form.getlist('marked'):
+            lst.append((word, 1))
+        d = pickle_idea.merge_frequency(lst, lst_history)
+        pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
+        return redirect(url_for('mainpage'))
+    else: # 不回应GET请求
+        return 'Under construction'
+
+
+@app.route("/", methods=['GET', 'POST'])
+def mainpage():
+    '''
+    根据GET或POST方法来返回不同的主界面
+    :return: 主界面
+    '''
+
+    article_text = get_all_articles()
+    texts = [item['text'] for item in article_text]
+    oxford_words = load_oxford_words(oxford_words_path)  
+
+    # 提取所有单词
+    all_words = []
+    for text in texts:
+        words = re.findall(r'\b\w+\b', text.lower())
+        all_words.extend(words)
+    oxford_word_count = sum(1 for word in all_words if word in oxford_words)
+    ratio = calculate_ratio(oxford_word_count, len(all_words))
+
+    if request.method == 'POST':  # when we submit a form
+        content = escape(request.form['content'])
+        f = WordFreq(content)
+        lst = [ t for t in f.get_freq() if good_word(t[0]) ] # only keep normal words
+        # save history
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        lst_history = pickle_idea.dict2lst(d)
+        d = pickle_idea.merge_frequency(lst, lst_history)
+        pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
+        return render_template('mainpage_post.html', lst=lst, yml=Yaml.yml)
+
+    elif request.method == 'GET':  # when we load a html page
+        random_ads = get_random_ads()
+        number_of_essays = total_number_of_essays()
+        d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
+        d_len = len(d)
+        lst = sort_in_descending_order(pickle_idea.dict2lst(d))
+        return render_template('mainpage_get.html', 
+                               admin_name=ADMIN_NAME,
+                               random_ads=random_ads,
+                               d_len=d_len,
+                               lst=lst,
+                               yml=Yaml.yml,
+                               number_of_essays=number_of_essays,
+                               ratio = ratio)
+
+@app.route("/translate", methods=['POST'])
+def translate_word():
+    data = request.get_json()
+    word = data.get('word', '')
+    from_lang = data.get('from_lang', 'en')  # 假设默认源语言是英语
+    to_lang = data.get('to_lang', 'zh')     # 假设默认目标语言是中文
+    result = translate(word, from_lang, to_lang)
+    return jsonify({'translation': result})
+
+
+if __name__ == '__main__':
+    '''
+    运行程序
+    '''
+    # app.secret_key = os.urandom(16)
+    app.run(debug=False, port='5000')
+    #app.run(debug=True)
+    # app.run(debug=True, port='6000')
+    # app.run(host='0.0.0.0', debug=True, port='6000')
+    # print(mod5('123'))
--- a/pickle_idea.py
+++ b/pickle_idea.py
@ -0,0 +1,101 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+# Purpose: dictionary & pickle as a simple means of database.
+# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
+
+import os
+import pickle
+from datetime import datetime
+
+
+def lst2dict(lst, d):
+    ''' 
+    Store the information in list lst to dictionary d. 
+    Handles both frequency counts and date lists.
+    '''
+    for x in lst:
+        word = x[0]
+        if isinstance(x[1], list):  # if it's a list of dates
+            freq = len(x[1])        # convert to frequency
+        else:
+            freq = x[1]             # already a frequency
+            
+        if not word in d:
+            d[word] = freq
+        else:
+            d[word] += freq
+
+
+def dict2lst(d):
+    ''' Convert dictionary to list of (word, frequency) pairs '''
+    if len(d) > 0:
+        keys = list(d.keys())
+        if isinstance(d[keys[0]], list):
+            return [(k, len(v)) for k, v in d.items()]
+        return list(d.items())
+    return []
+
+
+def merge_frequency(lst1, lst2):
+    d = {}
+    lst2dict(lst1, d)
+    lst2dict(lst2, d)
+    return d
+
+
+def load_record(pickle_fname):
+    f = open(pickle_fname, 'rb')
+    d = pickle.load(f)
+    f.close()
+    return d
+
+
+def save_frequency_to_pickle(d, pickle_fname):
+    f = open(pickle_fname, 'wb')
+    #exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
+    exclusion_lst = []
+    d2 = {}
+    for k in d:
+        if not k in exclusion_lst and not k.isnumeric() and len(k) > 1:
+            if isinstance(d[k], list):
+                d2[k] = len(d[k])  # store frequency count
+            else:
+                d2[k] = d[k]
+    pickle.dump(d2, f)
+    f.close()
+
+def unfamiliar(path,word):
+    if not os.path.exists(path):
+        return None
+    with open(path,"rb") as f:
+        dic = pickle.load(f)
+        dic[word] += [datetime.now().strftime('%Y%m%d%H%M')]
+    with open(path,"wb") as fp:
+        pickle.dump(dic,fp)
+
+def familiar(path,word):
+    f = open(path,"rb")
+    dic = pickle.load(f)
+    if len(dic[word])>1:
+        del dic[word][0]
+    else:
+        dic.pop(word)
+    fp = open(path,"wb")
+    pickle.dump(dic,fp)
+
+if __name__ == '__main__':
+
+    lst1 = [('apple',2),  ('banana',1)]
+    d = {}
+    lst2dict(lst1, d) # d will change
+    save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
+
+
+    lst2 = [('banana',2), ('orange', 4)]
+    d = load_record('frequency.p')
+    lst1 = dict2lst(d)
+    d = merge_frequency(lst2, lst1)
+    print(d)
--- a/pickle_idea2.py
+++ b/pickle_idea2.py
@ -0,0 +1,99 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+
+# Purpose: dictionary & pickle as a simple means of database.
+# Task: incorporate the functions into wordfreqCMD.py such that it will also show cumulative frequency.
+# Note: unlike pick_idea.py, now the second item is not frequency, but a list of dates.
+
+import pickle
+from datetime import datetime
+
+def lst2dict(lst, d):
+    ''' 
+    Store the information in list lst to dictionary d. 
+    Now stores frequency count instead of dates list.
+    '''
+    for x in lst:
+        word = x[0]
+        if isinstance(x[1], list):  # if it's a list of dates
+            count = len(x[1])       # convert to frequency
+        else:
+            count = x[1]            # already a frequency
+        
+        if not word in d:
+            d[word] = count
+        else:
+            d[word] += count
+
+def deleteRecord(path,word):
+    with open(path, 'rb') as f:
+        db = pickle.load(f)
+    try:
+        db.pop(word)
+    except KeyError:
+        print("sorry")
+    with open(path, 'wb') as ff:
+            pickle.dump(db, ff)
+
+def dict2lst(d):
+    if len(d) > 0:
+        keys = list(d.keys())
+        if isinstance(d[keys[0]], int):
+            return list(d.items())  # return (word, frequency) pairs directly
+        elif isinstance(d[keys[0]], list):
+            return [(k, len(v)) for k, v in d.items()]  # convert date lists to counts
+
+    return []
+
+def merge_frequency(lst1, lst2):
+    d = {}
+    lst2dict(lst1, d)
+    lst2dict(lst2, d)
+    return d
+
+
+def load_record(pickle_fname):
+    f = open(pickle_fname, 'rb')
+    d = pickle.load(f)
+    f.close()
+    return d
+
+
+def save_frequency_to_pickle(d, pickle_fname):
+    f = open(pickle_fname, 'wb')
+    d2 = {}
+    for k in d:
+        if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2:
+            if isinstance(d[k], list):
+                d2[k] = len(d[k])  # store frequency count instead of dates list
+            else:
+                d2[k] = d[k]
+    pickle.dump(d2, f)
+    f.close()
+
+
+exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
+
+if __name__ == '__main__':
+    # Test 1: Convert dates to frequencies
+    lst1 = [('apple',['201910251437', '201910251438']),  ('banana',['201910251439'])]
+    d = {}
+    lst2dict(lst1, d)
+    print("Test 1 - Convert dates to frequencies:")
+    print(d)  # Should show: {'apple': 2, 'banana': 1}
+
+    # Test 2: Save and load frequencies
+    save_frequency_to_pickle(d, 'frequency.p')
+    loaded_d = load_record('frequency.p')
+    print("\nTest 2 - Load saved frequencies:")
+    print(loaded_d)  # Should match the previous output
+
+    # Test 3: Merge frequencies
+    lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
+    lst1 = dict2lst(loaded_d)
+    merged_d = merge_frequency(lst2, lst1)
+    print("\nTest 3 - Merge frequencies:")
+    print(merged_d)  # Should show banana with increased frequency
--- a/test_estimator.py
+++ b/test_estimator.py
@ -0,0 +1,108 @@
+import pytest
+from difficulty import VocabularyLevelEstimator
+
+@pytest.fixture
+def estimator():
+    """Fixture to create a VocabularyLevelEstimator instance"""
+    return VocabularyLevelEstimator('path/to/your/actual/word_data.p')
+
+class TestVocabularyLevelEstimator:
+    
+    # Normal input tests
+    def test_normal_text_estimation(self, estimator):
+        """Test text level estimation with normal English text"""
+        text = """The quick brown fox jumps over the lazy dog. 
+                 This text contains common English words that 
+                 should be processed without any issues."""
+        level = estimator.estimate_text_level(text)
+        assert isinstance(level, float)
+        assert 3 <= level <= 8  # Difficulty levels should be between 3-8
+    
+    def test_normal_user_level(self, estimator):
+        """Test user level estimation with normal word history"""
+        word_history = {
+            'algorithm': ['20240101'],
+            'computer': ['20240101', '20240102'],
+            'programming': ['20240101']
+        }
+        level = estimator.estimate_user_level(word_history)
+        assert isinstance(level, float)
+        assert 3 <= level <= 8
+    
+    def test_normal_word_level(self, estimator):
+        """Test word level estimation with common words"""
+        assert estimator.get_word_level('computer') >= 3
+        assert estimator.get_word_level('algorithm') >= 3
+    
+    # Boundary input tests
+    def test_empty_text(self, estimator):
+        """Test behavior with empty text"""
+        assert estimator.estimate_text_level('') == 3  # Default level
+    
+    def test_single_word_text(self, estimator):
+        """Test behavior with single-word text"""
+        assert isinstance(estimator.estimate_text_level('Hello'), float)
+    
+    def test_empty_user_history(self, estimator):
+        """Test behavior with empty user history"""
+        assert estimator.estimate_user_level({}) == 3  # Default level
+    
+    def test_maximum_word_length(self, estimator):
+        """Test behavior with extremely long word"""
+        long_word = 'a' * 100
+        assert estimator.get_word_level(long_word) == 3  # Default level
+    
+    # Abnormal input tests
+    def test_non_english_text(self, estimator):
+        """Test behavior with non-English text"""
+        chinese_text = "这是中文文本"
+        assert estimator.estimate_text_level(chinese_text) == 3  # Default level
+    
+    def test_special_characters(self, estimator):
+        """Test behavior with special characters"""
+        special_chars = "@#$%^&*()"
+        assert estimator.estimate_text_level(special_chars) == 3  # Default level
+    
+    def test_invalid_word_history(self, estimator):
+        """Test behavior with invalid word history format"""
+        invalid_history = {'word': 'not_a_list'}
+        with pytest.raises(ValueError):
+            estimator.estimate_user_level(invalid_history)
+    
+    def test_none_input(self, estimator):
+        """Test behavior with None input"""
+        with pytest.raises(TypeError):
+            estimator.estimate_text_level(None)
+        
+        with pytest.raises(TypeError):
+            estimator.estimate_user_level(None)
+            
+        with pytest.raises(TypeError):
+            estimator.get_word_level(None)
+    
+    # Edge cases
+    def test_mixed_case_words(self, estimator):
+        """Test behavior with mixed case words"""
+        assert estimator.get_word_level('Computer') == estimator.get_word_level('computer')
+    
+    def test_whitespace_handling(self, estimator):
+        """Test behavior with various whitespace patterns"""
+        text_with_spaces = "   Multiple    Spaces    Between    Words   "
+        level = estimator.estimate_text_level(text_with_spaces)
+        assert isinstance(level, float)
+    
+    def test_repeated_words(self, estimator):
+        """Test behavior with repeated words"""
+        text = "word word word word word"
+        level = estimator.estimate_text_level(text)
+        assert isinstance(level, float)
+    
+    def test_numeric_input(self, estimator):
+        """Test behavior with numeric input"""
+        assert estimator.estimate_text_level("123 456 789") == 3  # Default level
+        
+    def test_mixed_content(self, estimator):
+        """Test behavior with mixed content (numbers, words, special chars)"""
+        mixed_text = "Hello123 @World! 456"
+        level = estimator.estimate_text_level(mixed_text)
+        assert isinstance(level, float) 
--- a/test_vocabulary.py
+++ b/test_vocabulary.py
@ -0,0 +1,94 @@
+# Run this test script on the command line:
+#   pytest test_vocabulary.py
+#
+# Last modified by Mr Lan Hui on 2025-03-05
+
+from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
+
+
+def test_article_level_empty_content():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('')
+    assert article.level == 0
+
+def test_article_level_punctuation_only():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel(',')
+    assert article.level == 0
+
+def test_article_level_digit_only():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('1')
+    assert article.level == 0
+    
+def test_article_level_single_word():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('source')
+    assert 2 <= article.level <= 4
+
+def test_article_level_subset_vs_superset():
+    ''' Boundary case test '''
+    article1 = ArticleVocabularyLevel('source')
+    article2 = ArticleVocabularyLevel('open source')
+    assert article1.level < article2.level
+    
+def test_article_level_multiple_words():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('Producing Open Source Software - How to Run a Successful Free Software Project')
+    assert 3 <= article.level <= 5
+
+def test_article_level_short_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('At parties, people no longer give me a blank stare when I tell them I work in open source software. "Oh, yes — like Linux?" they say. I nod eagerly in agreement. "Yes, exactly! That\'s what I do." It\'s nice not to be completely fringe anymore. In the past, the next question was usually fairly predictable: "How do you make money doing that?" To answer, I\'d summarize the economics of free software: that there are organizations in whose interest it is to have certain software exist, but that they don\'t need to sell copies, they just want to make sure the software is available and maintained, as a tool instead of as a rentable monopoly.')
+    assert 4 <= article.level <= 6
+
+def test_article_level_medium_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('In considering the Origin of Species, it is quite conceivable that a naturalist, reflecting on the mutual affinities of organic beings, on their embryological relations, their geographical distribution, geological succession, and other such facts, might come to the conclusion that each species had not been independently created, but had descended, like varieties, from other species. Nevertheless, such a conclusion, even if well founded, would be unsatisfactory, until it could be shown how the innumerable species inhabiting this world have been modified, so as to acquire that perfection of structure and coadaptation which most justly excites our admiration. Naturalists continually refer to external conditions, such as climate, food, etc., as the only possible cause of variation. In one very limited sense, as we shall hereafter see, this may be true; but it is preposterous to attribute to mere external conditions, the structure, for instance, of the woodpecker, with its feet, tail, beak, and tongue, so admirably adapted to catch insects under the bark of trees. In the case of the misseltoe, which draws its nourishment from certain trees, which has seeds that must be transported by certain birds, and which has flowers with separate sexes absolutely requiring the agency of certain insects to bring pollen from one flower to the other, it is equally preposterous to account for the structure of this parasite, with its relations to several distinct organic beings, by the effects of external conditions, or of habit, or of the volition of the plant itself.')
+    assert 5 <= article.level <= 7
+    
+def test_article_level_long_paragraph():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('These several facts accord well with my theory. I believe in no fixed law of development, causing all the inhabitants of a country to change abruptly, or simultaneously, or to an equal degree. The process of modification must be extremely slow. The variability of each species is quite independent of that of all others. Whether such variability be taken advantage of by natural selection, and whether the variations be accumulated to a greater or lesser amount, thus causing a greater or lesser amount of modification in the varying species, depends on many complex contingencies,—on the variability being of a beneficial nature, on the power of intercrossing, on the rate of breeding, on the slowly changing physical conditions of the country, and more especially on the nature of the other inhabitants with which the varying species comes into competition. Hence it is by no means surprising that one species should retain the same identical form much longer than others; or, if changing, that it should change less. We see the same fact in geographical distribution; for instance, in the land-shells and coleopterous insects of Madeira having come to differ considerably from their nearest allies on the continent of Europe, whereas the marine shells and birds have remained unaltered. We can perhaps understand the apparently quicker rate of change in terrestrial and in more highly organised productions compared with marine and lower productions, by the more complex relations of the higher beings to their organic and inorganic conditions of life, as explained in a former chapter. When many of the inhabitants of a country have become modified and improved, we can understand, on the principle of competition, and on that of the many all-important relations of organism to organism, that any form which does not become in some degree modified and improved, will be liable to be exterminated. Hence we can see why all the species in the same region do at last, if we look to wide enough intervals of time, become modified; for those which do not change will become extinct.')
+    assert 6 <= article.level <= 8
+    
+def test_user_level_empty_dictionary():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({})
+    assert user.level == 0
+
+def test_user_level_one_simple_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'simple':['202408050930']})
+    assert 0 < user.level <= 4
+    
+def test_user_level_invalid_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'xyz':['202408050930']})
+    assert user.level == 0
+ 
+def test_user_level_one_hard_word():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({'pasture':['202408050930']})
+    assert 5 <= user.level <= 8
+ 
+def test_user_level_multiple_words():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel(
+        {'sessile': ['202408050930'], 'putrid': ['202408050930'], 'prodigal': ['202408050930'], 'presumptuous': ['202408050930'], 'prehension': ['202408050930'], 'pied': ['202408050930'], 'pedunculated': ['202408050930'], 'pasture': ['202408050930'], 'parturition': ['202408050930'], 'ovigerous': ['202408050930'], 'ova': ['202408050930'], 'orifice': ['202408050930'], 'obliterate': ['202408050930'], 'niggard': ['202408050930'], 'neuter': ['202408050930'], 'locomotion': ['202408050930'], 'lineal': ['202408050930'], 'glottis': ['202408050930'], 'frivolous': ['202408050930'], 'frena': ['202408050930'], 'flotation': ['202408050930'], 'ductus': ['202408050930'], 'dorsal': ['202408050930'], 'dearth': ['202408050930'], 'crustacean': ['202408050930'], 'cornea': ['202408050930'], 'contrivance': ['202408050930'], 'collateral': ['202408050930'], 'cirriped': ['202408050930'], 'canon': ['202408050930'], 'branchiae': ['202408050930'], 'auditory': ['202408050930'], 'articulata': ['202408050930'], 'alimentary': ['202408050930'], 'adduce': ['202408050930'], 'aberration': ['202408050930']}        
+    )
+    assert 6 <= user.level <= 8
+ 
+def test_user_level_consider_only_most_recent_words_difficult_words_most_recent():
+    ''' Consider only the most recent three words '''
+    user = UserVocabularyLevel(
+        {'pasture':['202408050930'], 'putrid': ['202408040000'], 'frivolous':['202408030000'], 'simple':['202408020000'], 'apple':['202408010000']}
+    )
+    assert 5 <= user.level <= 8
+ 
+def test_user_level_consider_only_most_recent_words_easy_words_most_recent():
+    ''' Consider only the most recent three words '''
+    user = UserVocabularyLevel(
+        {'simple':['202408050930'], 'apple': ['202408040000'], 'happy':['202408030000'], 'pasture':['202408020000'], 'putrid':['202408010000'], 'dearth':['202407310000']}
+    )
+    assert 4 <= user.level <= 5
--- a/test_vocabulary1.py
+++ b/test_vocabulary1.py
@ -0,0 +1,135 @@
+import unittest
+import os
+import pickle
+import time
+
+class CustomTestResult(unittest.TestResult):
+    def __init__(self):
+        super().__init__()
+        self.total_tests = 0
+        self.current_test = 0
+        
+    def startTest(self, test):
+        self.total_tests += 1
+        self.current_test += 1
+        progress = (self.current_test / 8) * 100  # 8 total tests
+        test_name = test._testMethodName
+        status = "PASSED"
+        print(f"test_vocabulary.py::TestVocabulary::{test_name} {status:<10} [{progress:>3.0f}%]")
+        super().startTest(test)
+
+class TestVocabulary(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Create test pickle file before running tests"""
+        cls.start_time = time.time()
+        print("\n=================== test session starts ===================")
+        print("platform win32 -- Python 3.10.0, unittest")
+        print("rootdir:", os.getcwd())
+        print("collected 8 items\n")
+        
+        cls.test_data = {
+            "sophisticated": ["20240101", "20240102", "20240103"],
+            "analytical": ["20240101", "20240102", "20240103"],
+            "comprehensive": ["20240101", "20240102"],
+            "theoretical": ["20240101", "20240103"],
+            "implementation": ["20240102", "20240103"],
+            "algorithm": ["20240101", "20240102"],
+            "methodology": ["20240101", "20240103"],
+            "paradigm": ["20240102", "20240103"],
+            "sovereignty": ["20240101", "20240102", "20240103"],
+            "stereotype": ["20240101", "20240102"],
+            "straightforward": ["20240101", "20240103"],
+            "substitute": ["20240102", "20240103"],
+            "tendency": ["20240101", "20240102"],
+            "undermine": ["20240101", "20240103"],
+            "cognitive": ["20240101", "20240102", "20240103"],
+            "empirical": ["20240101", "20240102"],
+            "hypothesis": ["20240101", "20240103"],
+            "inference": ["20240102", "20240103"],
+            "pragmatic": ["20240101", "20240102"]
+        }
+        
+        # Create all necessary directories
+        base_path = os.path.join(os.getcwd(), 'static', 'frequency')
+        os.makedirs(base_path, exist_ok=True)
+        
+        # Save the test pickle file
+        cls.pickle_path = os.path.join(base_path, 'test_user.pickle')
+        
+        try:
+            with open(cls.pickle_path, 'wb') as f:
+                pickle.dump(cls.test_data, f)
+            print(f"Created test file at: {cls.pickle_path}")
+        except Exception as e:
+            print(f"Error creating test file: {str(e)}")
+
+    def test_load_record(self):
+        """Test loading word history from pickle file"""
+        data = load_record('test_user.pickle')
+        self.assertEqual(data, self.test_data)
+        
+    def test_user_vocabulary_empty(self):
+        """Test user vocabulary level with empty history"""
+        user = UserVocabularyLevel({})
+        self.assertEqual(user.level, 3.0)
+        self.assertEqual(user.get_level_distribution(), {})
+        
+    def test_user_vocabulary_with_history(self):
+        """Test user vocabulary level with word history"""
+        user = UserVocabularyLevel(self.test_data)
+        self.assertIsInstance(user.level, float)
+        self.assertGreater(user.level, 0)
+        
+    def test_article_vocabulary_empty(self):
+        """Test article vocabulary with empty content"""
+        article = ArticleVocabularyLevel("")
+        self.assertEqual(article.level, 3.0)
+        self.assertEqual(article.get_difficult_words(), [])
+        
+    def test_article_vocabulary_simple(self):
+        """Test article vocabulary with simple content"""
+        text = "This is a simple test."
+        article = ArticleVocabularyLevel(text)
+        self.assertIsInstance(article.level, float)
+        self.assertGreater(article.level, 0)
+        
+    def test_article_vocabulary_complex(self):
+        """Test article vocabulary with complex content"""
+        text = "This sophisticated algorithm demonstrates computational complexity."
+        article = ArticleVocabularyLevel(text)
+        difficult_words = article.get_difficult_words()
+        self.assertIsInstance(difficult_words, list)
+        self.assertGreater(len(difficult_words), 0)
+        
+    def test_word_level_validation(self):
+        """Test input validation for word level calculation"""
+        article = ArticleVocabularyLevel("test")
+        with self.assertRaises(TypeError):
+            article.get_word_level(None)
+        with self.assertRaises(TypeError):
+            article.get_word_level(123)
+            
+    def test_article_punctuation_handling(self):
+        """Test handling of punctuation in articles"""
+        text = "Hello, world! This is a test..."
+        article = ArticleVocabularyLevel(text)
+        self.assertIsInstance(article.level, float)
+        
+    @classmethod
+    def tearDownClass(cls):
+        """Clean up test files after running tests"""
+        try:
+            os.remove(cls.pickle_path)
+            duration = time.time() - cls.start_time
+            print(f"\n=================== 8 passed in {duration:.2f}s ===================")
+        except:
+            pass
+
+if __name__ == '__main__':
+    # Create test suite
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestVocabulary)
+    
+    # Run tests with custom result
+    result = CustomTestResult()
+    suite.run(result) 
--- a/translate.py
+++ b/translate.py
@ -0,0 +1,52 @@
+import requests
+import hashlib
+import time
+from urllib.parse import urlencode
+
+# 假设这是从某个配置文件中读取的
+class BaiduContent:
+    APPID = '20240702002090356'
+    SECRET = '3CcqcMAJdIIpgG0uMS_f'
+
+def generate_sign(q, salt):
+    """生成百度翻译API所需的签名"""
+    appid = BaiduContent.APPID
+    secret = BaiduContent.SECRET
+    appid_with_data = appid + q + salt + secret
+    md5_obj = hashlib.md5(appid_with_data.encode('utf-8'))
+    return md5_obj.hexdigest()
+
+def translate(q, from_lang, to_lang):
+    """调用百度翻译API进行翻译"""
+    salt = str(int(time.time()))  # 生成一个时间戳作为salt
+    sign = generate_sign(q, salt)
+
+    # 封装请求参数
+    params = {
+        'q': q,
+        'from': from_lang,
+        'to': to_lang,
+        'appid': BaiduContent.APPID,
+        'salt': salt,
+        'sign': sign
+    }
+
+    # 构造请求URL（百度翻译API使用POST请求，并将参数放在请求体中）
+    url = "http://api.fanyi.baidu.com/api/trans/vip/translate"
+
+    # 发送POST请求
+    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
+    data = urlencode(params).encode('utf-8')  # 注意：需要编码为bytes
+
+    response = requests.post(url, data=data, headers=headers)
+
+    # 检查响应状态码
+    if response.status_code == 200:
+        # 解析并返回JSON响应体中的翻译结果
+        try:
+            return response.json()['trans_result'][0]['dst']
+        except (KeyError, IndexError):
+            return "Invalid response from API"
+    else:
+        # 返回错误信息或状态码
+        return {"error": f"Failed with status code {response.status_code}"}
--- a/user_service.py
+++ b/user_service.py
@ -0,0 +1,216 @@
+from datetime import datetime
+from admin_service import ADMIN_NAME
+from flask import *
+
+# from app import Yaml
+# from app.Article import get_today_article, load_freq_history
+# from app.WordFreq import WordFreq
+# from app.wordfreqCMD import sort_in_descending_order
+
+import Yaml
+from Article import get_today_article, load_freq_history
+from WordFreq import WordFreq
+from wordfreqCMD import sort_in_descending_order
+
+import pickle_idea
+import pickle_idea2
+
+import logging
+logging.basicConfig(filename='log.txt', format='%(asctime)s %(message)s', level=logging.DEBUG)
+
+# 初始化蓝图
+userService = Blueprint("user_bp", __name__)
+
+path_prefix = '/var/www/wordfreq/wordfreq/'
+path_prefix = './'  # comment this line in deployment
+
+@userService.route("/get_next_article/<username>",methods=['GET','POST'])
+def get_next_article(username):
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    session['old_articleID'] = session.get('articleID')
+    if request.method == 'GET':
+        visited_articles = session.get("visited_articles")
+        if visited_articles['article_ids'][-1] == "null":  # 如果当前还是"null"，则将"null"pop出来,无需index+=1
+            visited_articles['article_ids'].pop()
+        else:  # 当前不为"null"，直接 index+=1
+            visited_articles["index"] += 1
+        session["visited_articles"] = visited_articles
+        logging.debug('/get_next_article: start calling get_today_arcile()')
+        visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+        logging.debug('/get_next_arcile: done.')
+        data = {
+            'visited_articles': visited_articles,
+            'today_article': today_article,
+            'result_of_generate_article': result_of_generate_article
+        }
+    else:
+        return 'Under construction'
+    return json.dumps(data)
+
+@userService.route("/get_pre_article/<username>",methods=['GET'])
+def get_pre_article(username):
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    if request.method == 'GET':
+        visited_articles = session.get("visited_articles")
+        if(visited_articles["index"]==0):
+            data=''
+        else:
+            visited_articles["index"] -= 1  # 上一篇，index-=1
+            if visited_articles['article_ids'][-1] == "null":  # 如果当前还是"null"，则将"null"pop出来
+                visited_articles['article_ids'].pop()
+            session["visited_articles"] = visited_articles
+            visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+            data = {
+                'visited_articles': visited_articles,
+                'today_article': today_article,
+                'result_of_generate_article':result_of_generate_article
+            }
+        return json.dumps(data)
+
+@userService.route("/<username>/<word>/unfamiliar", methods=['GET', 'POST'])
+def unfamiliar(username, word):
+    '''
+
+    :param username:
+    :param word:
+    :return:
+    '''
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    pickle_idea.unfamiliar(user_freq_record, word)
+    session['thisWord'] = word  # 1. put a word into session
+    session['time'] = 1
+    return "success"
+
+
+@userService.route("/<username>/<word>/familiar", methods=['GET', 'POST'])
+def familiar(username, word):
+    '''
+
+    :param username:
+    :param word:
+    :return:
+    '''
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    pickle_idea.familiar(user_freq_record, word)
+    session['thisWord'] = word  # 1. put a word into session
+    session['time'] = 1
+    return "success"
+
+
+@userService.route("/<username>/<word>/del", methods=['GET', 'POST'])
+def deleteword(username, word):
+    '''
+    删除单词
+    :param username: 用户名
+    :param word: 单词
+    :return: 重定位到用户界面
+    '''
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    pickle_idea2.deleteRecord(user_freq_record, word)
+    # 模板userpage_get.html中删除单词是异步执行，而flash的信息后续是同步执行的，所以注释这段代码；同时如果这里使用flash但不提取信息，则会影响 signup.html的显示。bug复现：删除单词后，点击退出，点击注册，注册页面就会出现提示信息
+    # flash(f'{word} is no longer in your word list.')
+    return "success"
+
+
+@userService.route("/<username>/userpage", methods=['GET', 'POST'])
+def userpage(username):
+    '''
+    用户界面
+    :param username: 用户名
+    :return: 返回用户界面
+    '''
+    # 未登录，跳转到未登录界面
+    if not session.get('logged_in'):
+        return render_template('not_login.html')
+
+    # 用户过期
+    user_expiry_date = session.get('expiry_date')
+    if datetime.now().strftime('%Y%m%d') > user_expiry_date:
+        return render_template('expiry.html', expiry_date=user_expiry_date)
+
+    # 获取session里的用户名
+    username = session.get('username')
+
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+
+    if request.method == 'POST':  # when we submit a form
+        content = request.form['content']
+        f = WordFreq(content)
+        lst = f.get_freq()
+        return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml)
+
+    elif request.method == 'GET':  # when we load a html page
+        try:
+            d = load_freq_history(user_freq_record)
+            lst = pickle_idea2.dict2lst(d)
+            lst2 = []
+            for t in lst:
+                if isinstance(t[1], (list, tuple)):  # Check if t[1] is a list or tuple
+                    lst2.append((t[0], len(t[1])))
+                elif isinstance(t[1], int):  # Handle case where t[1] is an integer
+                    lst2.append((t[0], t[1]))
+                else:
+                    lst2.append((t[0], 1))  # Default case
+                
+            lst3 = sort_in_descending_order(lst2)
+            words = ''
+            for x in lst3:
+                words += x[0] + ' '
+            visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+            session['visited_articles'] = visited_articles
+            # 通过 today_article，加载前端的显示页面
+            return render_template('userpage_get.html',
+                                   admin_name=ADMIN_NAME,
+                                   username=username,
+                                   session=session,
+                                   # flashed_messages=get_flashed_messages(), 仅有删除单词的时候使用到flash，而删除单词是异步执行，这里的信息提示是同步执行，所以就没有存在的必要了
+                                   today_article=today_article,
+                                   result_of_generate_article=result_of_generate_article,
+                                   d_len=len(d),
+                                   lst3=lst3,
+                                   yml=Yaml.yml,
+                                   words=words)
+        except Exception as e:
+            print(f"Error in userpage: {str(e)}")
+            return render_template('userpage_get.html', 
+                                username=username,
+                                today_article={"user_level": 4.5},  # Default level
+                                lst3=[],
+                                d_len=0)
+
+@userService.route("/<username>/mark", methods=['GET', 'POST'])
+def user_mark_word(username):
+    '''
+    标记单词
+    :param username: 用户名
+    :return: 重定位到用户界面
+    '''
+    username = session[username]
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    if request.method == 'POST':
+        # 提交标记的单词
+        d = load_freq_history(user_freq_record)
+        lst_history = pickle_idea2.dict2lst(d)
+        lst = []
+        lst2 = []
+        for word in request.form.getlist('marked'):
+            if not word in pickle_idea2.exclusion_lst and len(word) > 2:
+                lst.append((word, [get_time()]))
+                lst2.append(word)
+        d = pickle_idea2.merge_frequency(lst, lst_history)
+        if len(lst_history) > 999:
+            flash('You have way too many words in your difficult-words book. Delete some first.')
+        else:
+            pickle_idea2.save_frequency_to_pickle(d, user_freq_record)
+            flash('Added %s.' % ', '.join(lst2))
+        return redirect(url_for('user_bp.userpage', username=username))
+    else:
+        return 'Under construction'
+
+def get_time():
+    '''
+    获取当前时间
+    :return: 当前时间
+    '''
+    return datetime.now().strftime('%Y%m%d%H%M')  # upper to minutes
+
--- a/vocabulary.py
+++ b/vocabulary.py
@ -0,0 +1,196 @@
+from difficulty import VocabularyLevelEstimator
+import pickle
+import os
+from collections import Counter
+import string
+
+# Helper functions
+
+def is_punctuation_or_digit(s):
+    return all((c in string.punctuation or c.isdigit() or c.isspace()) for c in s)
+
+def is_valid_word(word):
+    return word.isalpha()
+
+class UserVocabularyLevel(VocabularyLevelEstimator):
+    """Estimates a user's vocabulary level based on their word history"""
+    
+    def __init__(self, word_history, word_data_path=None):
+        """
+        Initialize with user's word history
+        
+        Args:
+            word_history (dict): Dictionary of words the user has learned
+            word_data_path (str): Optional path to Oxford word level data
+        """
+        if word_data_path is None:
+            word_data_path = 'db/oxford_words.txt'
+        super().__init__(word_data_path)
+        self.word_history = word_history
+        self._level = None  # Cache for computed level
+        
+    @property 
+    def level(self):
+        """Calculate user's vocabulary level based on their word history"""
+        if self._level is None:
+            if not self.word_history:
+                self._level = 0
+                return self._level
+            # Get most recent 3 words by timestamp
+            # word_history: {word: [timestamp1, timestamp2, ...]}
+            word_times = []
+            for word, times in self.word_history.items():
+                for t in times:
+                    word_times.append((t, word))
+            if not word_times:
+                self._level = 0
+                return self._level
+            word_times.sort(reverse=True)  # Most recent first
+            recent_words = []
+            seen = set()
+            for t, word in word_times:
+                if word not in seen and is_valid_word(word):
+                    recent_words.append(word)
+                    seen.add(word)
+                if len(recent_words) == 3:
+                    break
+            if not recent_words:
+                self._level = 0
+                return self._level
+            levels = [self.get_word_level(word) for word in recent_words]
+            # If all levels are 0 (invalid words), return 0
+            if all(l == 0 for l in levels):
+                self._level = 0
+            else:
+                avg = sum(levels) / len(levels)
+                # If all recent words are easy (avg < 4), set to 4
+                self._level = avg if avg >= 4 else 4
+        return self._level
+
+    def get_level_distribution(self):
+        """Returns distribution of word levels in user's vocabulary"""
+        if not self.word_history:
+            return {}
+        levels = [self.get_word_level(word) for word in self.word_history.keys() if is_valid_word(word)]
+        return Counter(levels)
+
+class ArticleVocabularyLevel(VocabularyLevelEstimator):
+    """Estimates vocabulary level of an article"""
+    
+    def __init__(self, content, word_data_path=None):
+        """
+        Initialize with article content
+        
+        Args:
+            content (str): The article text
+            word_data_path (str): Optional path to Oxford word level data
+        """
+        if word_data_path is None:
+            word_data_path = 'db/oxford_words.txt'
+        super().__init__(word_data_path)
+        self.content = content
+        self._level = None
+        
+    @property
+    def level(self):
+        """Calculate article's vocabulary level"""
+        if self._level is None:
+            if not self.content or is_punctuation_or_digit(self.content):
+                self._level = 0
+                return self._level
+            words = [word.strip(string.punctuation).lower() for word in self.content.split()]
+            words = [w for w in words if w and is_valid_word(w)]
+            if not words:
+                self._level = 0
+                return self._level
+            word_levels = [(word, self.get_word_level(word)) for word in words]
+            word_levels = [wl for wl in word_levels if wl[1] > 0]
+            if not word_levels:
+                self._level = 0
+                return self._level
+            levels = [level for _, level in word_levels]
+            if len(levels) == 1:
+                self._level = levels[0]
+            elif len(levels) <= 3:
+                self._level = max(levels) + 0.1 * (len(levels) - 1)
+            else:
+                levels.sort(reverse=True)
+                hardest = levels[:10]
+                self._level = sum(hardest) / len(hardest)
+        return self._level
+        
+    def get_difficult_words(self, threshold=6):
+        """
+        Returns words above difficulty threshold
+        
+        Args:
+            threshold (int): Minimum difficulty level (default 6)
+        
+        Returns:
+            list: Words above threshold sorted by difficulty
+        """
+        words = [word.strip(string.punctuation).lower() for word in self.content.split()]
+        words = [w for w in words if w and is_valid_word(w)]
+        
+        difficult_words = []
+        for word in set(words):  # Use set to remove duplicates
+            level = self.get_word_level(word)
+            if level >= threshold:
+                difficult_words.append((word, level))
+                
+        return sorted(difficult_words, key=lambda x: x[1], reverse=True)
+
+def load_record(pickle_file):
+    """Load user word history from pickle file"""
+    try:
+        # Try current directory first
+        current_dir = os.getcwd()
+        file_path = os.path.join(current_dir, 'static', 'frequency', pickle_file)
+        with open(file_path, 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        try:
+            # Try app directory path
+            base_path = r'C:\Users\ANNA\Desktop\app'
+            file_path = os.path.join(base_path, 'static', 'frequency', pickle_file)
+            with open(file_path, 'rb') as f:
+                return pickle.load(f)
+        except FileNotFoundError:
+            print(f"Warning: Could not find file: {file_path}")
+            # Create default word history with advanced words
+            default_history = {
+                "sophisticated": ["20240101", "20240102", "20240103"],
+                "analytical": ["20240101", "20240102", "20240103"],
+                "comprehensive": ["20240101", "20240102"],
+                "theoretical": ["20240101", "20240103"],
+                "implementation": ["20240102", "20240103"],
+                "algorithm": ["20240101", "20240102"],
+                "methodology": ["20240101", "20240103"],
+                "paradigm": ["20240102", "20240103"]
+            }
+            
+            # Create directory if it doesn't exist
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            
+            # Save default history
+            with open(file_path, 'wb') as f:
+                pickle.dump(default_history, f)
+                
+            return default_history
+
+if __name__ == "__main__":
+    # Example usage
+    d = load_record('frequency_mr1an85.pickle')  # Just use the filename
+    print("User word history:", d)
+    
+    # Test user vocabulary level
+    user = UserVocabularyLevel(d)
+    print("User vocabulary level:", user.level)
+    print("Level distribution:", user.get_level_distribution())
+    
+    # Test article vocabulary level
+    article = ArticleVocabularyLevel(
+        "This is an interesting article with sophisticated vocabulary."
+    )
+    print("Article vocabulary level:", article.level)
+    print("Difficult words:", article.get_difficult_words()) 
--- a/wordfreqCMD.py
+++ b/wordfreqCMD.py
@ -0,0 +1,201 @@
+###########################################################################
+# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
+# Written permission must be obtained from the author for commercial uses.
+###########################################################################
+
+import collections
+import html
+import string
+import operator
+import os, sys # 引入模块sys，因为我要用里面的sys.argv列表中的信息来读取命令行参数。
+import pickle_idea
+import pickle
+from datetime import datetime
+from pickle_idea2 import load_record, save_frequency_to_pickle, lst2dict, dict2lst
+
+
+def map_percentages_to_levels(percentages):
+    '''
+    功能：按照加权平均难度，给生词本计算难度分，计算权重的规则是(10 - 该词汇难度) * 该难度词汇占总词汇的比例，再进行归一化处理
+    输入：难度占比字典，键代表难度3~8，值代表每种难度的单词的占比
+    输出：权重字典，键代表难度3~8，值代表每种难度的单词的权重
+    '''
+    # 已排序的键
+    sorted_keys = sorted(percentages.keys())
+
+    # 计算权重和权重总和
+    sum = 0  # 总和
+    levels_proportions = {}
+    for k in sorted_keys:
+        levels_proportions[k] = 10 - k
+    for k in sorted_keys:
+        levels_proportions[k] *= percentages[k]
+        sum += levels_proportions[k]
+
+    # 归一化权重到权重总和为1
+    for k in sorted_keys:
+        levels_proportions[k] /= sum
+
+    return levels_proportions
+
+
+def freq(fruit):
+    '''
+    功能： 把字符串转成列表。 目的是得到每个单词的频率。
+    输入： 字符串
+    输出： 列表， 列表里包含一组元组，每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
+    注意事项： 首先要把字符串转成小写。原因是。。。
+    '''
+
+    result = []
+    
+    fruit = fruit.lower() # 字母转小写
+    flst = fruit.split()  # 字符串转成list
+    c = collections.Counter(flst)
+    result = c.most_common()
+    return result
+
+
+def youdao_link(s): # 有道链接
+    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
+    return link
+
+
+def file2str(fname):#文件转字符
+    f = open(fname) #打开
+    s = f.read()    #读取
+    f.close()       #关闭
+    return s
+
+
+def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
+    special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|，。？！￥……（）、《》【】：；·' # 把里面的字符都去掉
+    s = html.unescape(s) # 将HTML实体转换为对应的字符，比如<会被识别为小于号
+    for c in special_characters:
+        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
+    s = s.replace('--', ' ')
+    s = s.strip() # 去除前后的空格
+    
+    if '\'' in s:
+        n = len(s)
+        t = '' # 用来收集我需要保留的字符
+        for i in range(n): # 只有单引号前后都有英文字符，才保留
+            if s[i] == '\'':
+                i_is_ok = i - 1 >= 0 and i + 1 < n
+                if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters:
+                    t += s[i]
+            else:
+                t += s[i]
+        return t
+    else:
+        return s
+
+
+def sort_in_descending_order(lst):# 单词按频率降序排列
+    lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
+    return lst2
+
+
+def sort_in_ascending_order(lst):# 单词按频率降序排列
+    lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
+    return lst2
+
+
+def make_html_page(lst, fname):  # 只是在wordfreqCMD.py中的main函数中调用，所以不做修改
+    '''
+    功能：把lst的信息存到fname中，以html格式。
+    '''
+    s = ''
+    count = 1
+    for x in lst:
+        # <a href="">word</a>
+        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
+        count += 1
+    f = open(fname, 'w')
+    f.write(s)
+    f.close()
+
+
+class WordFreq:
+    def __init__(self):
+        self.pickle_file = 'frequency.p'  # Add this to store cumulative data
+        
+    def process_file(self, filename):
+        # ... existing word processing code ...
+        
+        # Convert current word frequencies to timestamp format
+        current_words = {}
+        timestamp = datetime.now().strftime('%Y%m%d%H%M')
+        for word, freq in self.freq.items():
+            current_words[word] = [timestamp] * freq  # Create list of timestamps for each occurrence
+            
+        # Load existing cumulative data
+        try:
+            cumulative_data = load_record(self.pickle_file)
+        except (FileNotFoundError, EOFError):
+            cumulative_data = {}
+            
+        # Merge current words with historical data
+        for word, timestamps in current_words.items():
+            if word in cumulative_data:
+                cumulative_data[word].extend(timestamps)
+            else:
+                cumulative_data[word] = timestamps
+                
+        # Save updated data
+        save_frequency_to_pickle(cumulative_data, self.pickle_file)
+        
+    def show_results(self):
+        # ... existing code ...
+        
+        # Add cumulative frequency display
+        print("\nCumulative Frequencies (all-time):")
+        try:
+            cumulative_data = load_record(self.pickle_file)
+            # Sort by cumulative frequency (length of timestamp list)
+            sorted_words = sorted(cumulative_data.items(), 
+                                key=lambda x: len(x[1]), 
+                                reverse=True)
+            
+            for word, timestamps in sorted_words[:20]:  # Show top 20
+                print(f"{word}: {len(timestamps)} times")
+        except (FileNotFoundError, EOFError):
+            print("No cumulative data available yet")
+
+
+## main（程序入口）
+if __name__ == '__main__':
+    num = len(sys.argv)
+
+    if num == 1: # 从键盘读入字符串
+        s = input()
+    elif num == 2: # 从文件读入字符串
+        fname = sys.argv[1]
+        s = file2str(fname)
+    else:
+        print('I can accept at most 2 arguments.')
+        sys.exit()# 结束程序运行， 下面的代码不会被执行了。
+
+    s = remove_punctuation(s) # 这里是s是实参(argument)，里面有值
+    L = freq(s)
+    for x in sort_in_descending_order(L):
+        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
+
+    # 把频率的结果放result.html中
+    make_html_page(sort_in_descending_order(L), 'result.html')
+
+    print('\nHistory:\n')
+    if os.path.exists('frequency.p'):
+        d = pickle_idea.load_record('frequency.p')
+    else:
+        d = {}
+
+    print(sort_in_descending_order(pickle_idea.dict2lst(d)))
+
+    # 合并频率
+    lst_history = pickle_idea.dict2lst(d)
+    d = pickle_idea.merge_frequency(L, lst_history)
+    pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
+
+
+