删除 app/test/test_bug551_DingZeYu.py

上传文件至 app/test
Merge pull request 'WIP：Bug529-GuHan' (#88 ) from Bug529-GuHan into master
2024-05-06 11:42:32 +08:00 · 2024-05-06 11:36:36 +08:00 · 2023-06-04 12:39:34 +08:00 · 2023-05-28 16:31:12 +08:00 · 2023-05-25 22:30:06 +08:00 · 2023-05-25 21:23:25 +08:00
17 changed files with 296 additions and 234 deletions
--- a/app/Article.py
+++ b/app/Article.py
@ -7,7 +7,7 @@ import random, glob
 import hashlib
 from datetime import datetime
 from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
-from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
+from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level


 path_prefix = '/var/www/wordfreq/wordfreq/'
@ -32,20 +32,20 @@ def get_article_body(s):
    return '\n'.join(lst)


-def get_today_article(user_word_list, had_read_articles):
+def get_today_article(user_word_list, visited_articles):
    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
-    if had_read_articles is None:
-        had_read_articles = {
+    if visited_articles is None:
+        visited_articles = {
            "index" : 0,  # 为 article_ids 的索引
            "article_ids": []  # 之前显示文章的id列表，越后越新
        }
-    if had_read_articles["index"] > len(had_read_articles["article_ids"])-1:  # 生成新的文章，因此查找所有的文章
+    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章，因此查找所有的文章
        rq.instructions("SELECT * FROM article")
    else:  # 生成阅读过的文章，因此查询指定 article_id 的文章
-        if had_read_articles["article_ids"][had_read_articles["index"]] == 'null':  # 可能因为直接刷新页面导致直接去查询了'null'，因此当刷新的页面的时候，需要直接进行“上一篇”操作
-            had_read_articles["index"] -= 1
-            had_read_articles["article_ids"].pop()
-        rq.instructions('SELECT * FROM article WHERE article_id=%d' % (had_read_articles["article_ids"][had_read_articles["index"]]))
+        if visited_articles["article_ids"][visited_articles["index"]] == 'null':  # 可能因为直接刷新页面导致直接去查询了'null'，因此当刷新的页面的时候，需要直接进行“上一篇”操作
+            visited_articles["index"] -= 1
+            visited_articles["article_ids"].pop()
+        rq.instructions('SELECT * FROM article WHERE article_id=%d' % (visited_articles["article_ids"][visited_articles["index"]]))
    rq.do()
    result = rq.get_results()
    random.shuffle(result)
@ -53,32 +53,32 @@ def get_today_article(user_word_list, had_read_articles):
    # Choose article according to reader's level
    d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
    d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
-    d3 = get_difficulty_level(d1, d2)
+    d3 = get_difficulty_level_for_user(d1, d2)

    d = None
    result_of_generate_article = "not found"
    d_user = load_freq_history(user_word_list)
    user_level = user_difficulty_level(d_user, d3)  # more consideration as user's behaviour is dynamic. Time factor should be considered.
    text_level = 0
-    if had_read_articles["index"] > len(had_read_articles["article_ids"])-1:  # 生成新的文章
-        amount_of_had_read_articles = len(had_read_articles["article_ids"])
+    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章
+        amount_of_visited_articles = len(visited_articles["article_ids"])
        amount_of_existing_articles = result.__len__()
-        if amount_of_had_read_articles == amount_of_existing_articles:  # 如果当前阅读过的文章的数量 == 存在的文章的数量，即所有的书本都阅读过了
+        if amount_of_visited_articles == amount_of_existing_articles:  # 如果当前阅读过的文章的数量 == 存在的文章的数量，即所有的书本都阅读过了
            result_of_generate_article = "had read all articles"
        else:
            for k in range(3):  # 最多尝试3次
                for reading in result:
                    text_level = text_difficulty_level(reading['text'], d3)
                    factor = random.gauss(0.8, 0.1)  # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
-                    if reading['article_id'] not in had_read_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):  # 新的文章之前没有出现过且符合一定范围的水平
+                    if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):  # 新的文章之前没有出现过且符合一定范围的水平
                        d = reading
-                        had_read_articles["article_ids"].append(d['article_id'])  # 列表添加新的文章id；下面进行
+                        visited_articles["article_ids"].append(d['article_id'])  # 列表添加新的文章id；下面进行
                        result_of_generate_article = "found"
                        break
                if result_of_generate_article == "found":  # 用于成功找到文章后及时退出外层循环
                    break
        if result_of_generate_article != "found":  # 阅读完所有文章，或者循环3次没有找到适合的文章，则放入空（“null”）
-            had_read_articles["article_ids"].append('null')
+            visited_articles["article_ids"].append('null')
    else:  # 生成已经阅读过的文章
        d = random.choice(result)
        text_level = text_difficulty_level(d['text'], d3)
@ -97,7 +97,7 @@ def get_today_article(user_word_list, had_read_articles):
            "answer": get_answer_part(d['question'])
        }

-    return had_read_articles, today_article, result_of_generate_article
+    return visited_articles, today_article, result_of_generate_article


 def load_freq_history(path):
--- a/app/Login.py
+++ b/app/Login.py
@ -3,6 +3,18 @@ import string
 from datetime import datetime, timedelta
 from UseSqlite import InsertQuery, RecordQuery

+def md5(s):
+    '''
+    MD5摘要
+    :param str: 字符串
+    :return: 经MD5以后的字符串
+    '''
+    h = hashlib.md5(s.encode(encoding='utf-8'))
+    return h.hexdigest()
+
+# import model.user after the defination of md5(s) to avoid circular import
+from model.user import get_user_by_username, insert_user, update_password_by_username
+
 path_prefix = '/var/www/wordfreq/wordfreq/'
 path_prefix = './'  # comment this line in deployment

@ -12,13 +24,9 @@ def verify_pass(newpass,oldpass):


 def verify_user(username, password):
-    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
-    password = md5(username + password)
-    rq.instructions_with_parameters("SELECT * FROM user WHERE name=:username AND password=:password", dict(
-        username=username, password=password))  # the named style https://docs.python.org/3/library/sqlite3.html
-    rq.do_with_parameters()
-    result = rq.get_results()
-    return result != []
+    user = get_user_by_username(username)
+    encoded_password = md5(username + password)
+    return user is not None and user.password == encoded_password


 def add_user(username, password):
@ -26,19 +34,12 @@ def add_user(username, password):
    expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d') # will expire after 30 days
    # 将用户名和密码一起加密，以免暴露不同用户的相同密码
    password = md5(username + password)
-    rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
-    rq.instructions_with_parameters("INSERT INTO user VALUES (:username, :password, :start_date, :expiry_date)", dict(
-        username=username, password=password, start_date=start_date, expiry_date=expiry_date))
-    rq.do_with_parameters()
+    insert_user(username=username, password=password, start_date=start_date, expiry_date=expiry_date)


 def check_username_availability(username):
-    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
-    rq.instructions_with_parameters(
-        "SELECT * FROM user WHERE name=:username", dict(username=username))
-    rq.do_with_parameters()
-    result = rq.get_results()
-    return result == []
+    existed_user = get_user_by_username(username)
+    return existed_user is None


 def change_password(username, old_password, new_password):
@ -54,35 +55,16 @@ def change_password(username, old_password, new_password):
    # 将用户名和密码一起加密，以免暴露不同用户的相同密码
    if verify_pass(new_password,old_password): #新旧密码一致
        return False
-    password = md5(username + new_password)
-    rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
-    rq.instructions_with_parameters("UPDATE user SET password=:password WHERE name=:username", dict(
-        password=password, username=username))
-    rq.do_with_parameters()
+    update_password_by_username(username, new_password)
    return True


 def get_expiry_date(username):
-    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
-    rq.instructions_with_parameters(
-        "SELECT expiry_date FROM user WHERE name=:username", dict(username=username))
-    rq.do_with_parameters()
-    result = rq.get_results()
-    if len(result) > 0:
-        return result[0]['expiry_date']
-    else:
+    user = get_user_by_username(username)
+    if user is None:
        return '20191024'
-
-
-def md5(s):
-    '''
-    MD5摘要
-    :param str: 字符串
-    :return: 经MD5以后的字符串
-    '''
-    h = hashlib.md5(s.encode(encoding='utf-8'))
-    return h.hexdigest()
-
+    else:
+        return user.expiry_date

 class UserName:
    def __init__(self, username):
--- a/app/account_service.py
+++ b/app/account_service.py
@ -37,7 +37,7 @@ def signup():
                session[username] = username
                session['username'] = username
                session['expiry_date'] = get_expiry_date(username)
-                session['had_read_articles'] = None
+                session['visited_articles'] = None
                return jsonify({'status': '2'})
            else:
                return jsonify({'status': '1'})
@ -66,7 +66,7 @@ def login():
            session['username'] = username
            user_expiry_date = get_expiry_date(username)
            session['expiry_date'] = user_expiry_date
-            session['had_read_articles'] = None
+            session['visited_articles'] = None
            return jsonify({'status': '1'})
        else:
            return jsonify({'status': '0'})
--- a/app/admin_service.py
+++ b/app/admin_service.py
@ -91,10 +91,7 @@ def article():
        question = data.get("question", "")
        level = data.get("level", "4")
        if content:
-            try:  # check level
-                if level not in ['1', '2', '3', '4']:
-                    raise ValueError
-            except ValueError:
+            if level not in ['1', '2', '3', '4']:
                return "Level must be between 1 and 4."
            add_article(content, source, level, question)
            _update_context()
--- a/app/difficulty.py
+++ b/app/difficulty.py
@ -8,6 +8,7 @@
 import pickle
 import math
 from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
+import snowballstemmer


 def load_record(pickle_fname):
@ -17,40 +18,50 @@ def load_record(pickle_fname):
    return d


-def difficulty_level_from_frequency(word, d):
-    level = 1
-    if not word in d:
-        return level
+def convert_test_type_to_difficulty_level(d):
+    """
+    对原本的单词库中的单词进行难度评级
+    :param d: 存储了单词库pickle文件中的单词的字典
+    :return:
+    """
+    result = {}
+    L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word

-    if 'what' in d:
-        ratio = (d['what']+1)/(d[word]+1) # what is a frequent word
-        level = math.log( max(ratio, 1), 2)
+    for k in L:
+        if 'CET4' in d[k]:
+            result[k] = 4  # CET4 word has level 4
+        elif 'OXFORD3000' in d[k]:
+            result[k] = 5
+        elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
+            result[k] = 6
+        elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
+            result[k] = 7
+        elif 'BBC' in d[k]:
+            result[k] = 8

-    level = min(level, 8) 
-    return level
+    return result  # {'apple': 4, ...}


-def get_difficulty_level(d1, d2):
-    d = {}
-    L = list(d1.keys())  # in d1, we have freuqence for each word
-    L2 = list(d2.keys()) # in d2, we have test types (e.g., CET4,CET6,BBC) for each word
-    L.extend(L2)
-    L3 = list(set(L)) # L3 contains all words
-    for k in L3:
-        if k in d2:
-            if 'CET4' in d2[k]:
-                d[k] = 4 # CET4 word has level 4
-            elif 'CET6' in d2[k]:
-                d[k] = 6
-            elif 'BBC' in d2[k]:
-                d[k] = 8
-                if k in d1: # BBC could contain easy words that are not in CET4 or CET6.  So 4 is not reasonable.  Recompute difficulty level.
-                    d[k] = min(difficulty_level_from_frequency(k, d1), d[k])
-        elif k in d1:
-            d[k] = difficulty_level_from_frequency(k, d1)
-
-    return d
+def get_difficulty_level_for_user(d1, d2):
+    """
+    d2 来自于词库的35511个已标记单词
+    d1 用户不会的词
+    在d2的后面添加单词，没有新建一个新的字典
+    """
+    # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed.
+    d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
+    stemmer = snowballstemmer.stemmer('english')

+    for k in d1:  # 用户的词
+        if k in d2:  # 如果用户的词以原型的形式存在于词库d2中
+            continue  # 无需评级，跳过
+        else:
+            stem = stemmer.stemWord(k)
+            if stem in d2:  # 如果用户的词的词根存在于词库d2的词根库中
+                d2[k] = d2[stem]  # 按照词根进行评级
+            else:
+                d2[k] = 3  # 如果k的词根都不在，那么就当认为是3级
+    return d2


 def revert_dict(d):
@ -62,12 +73,13 @@ def revert_dict(d):
    for k in d:
        if type(d[k]) is list:  # d[k] is a list of dates.
            lst = d[k]
-        elif type(d[k]) is int: # for backward compatibility.  d was sth like {'word':1}.  The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book. 
+        elif type(d[
+                      k]) is int:  # for backward compatibility.  d was sth like {'word':1}.  The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
            freq = d[k]
-            lst = freq*['2021082019'] # why choose this date?  No particular reasons.  I fix the bug in this date.
+            lst = freq * ['2021082019']  # why choose this date?  No particular reasons.  I fix the bug in this date.

        for time_info in lst:
-            date = time_info[:10] # until hour
+            date = time_info[:10]  # until hour
            if not date in d2:
                d2[date] = [k]
            else:
@ -76,42 +88,44 @@ def revert_dict(d):


 def user_difficulty_level(d_user, d):
-    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
+    d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
    count = 0
    geometric = 1
-    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
-        lst = d_user2[date] # a list of words
-        lst2 = [] # a list of tuples, (word, difficulty level)
-        for  word in lst:
+    for date in sorted(d_user2.keys(),
+                       reverse=True):  # most recently added words are more important while determining user's level
+        lst = d_user2[date]  # a list of words
+        lst2 = []  # a list of tuples, (word, difficulty level)
+        for word in lst:
            if word in d:
                lst2.append((word, d[word]))

-        lst3 = sort_in_ascending_order(lst2) # easiest tuple first
-        #print(lst3)
+        lst3 = sort_in_ascending_order(lst2)  # easiest tuple first
+        # print(lst3)
        for t in lst3:
            word = t[0]
            hard = t[1]
-            #print('WORD %s HARD %4.2f' % (word, hard))
+            # print('WORD %s HARD %4.2f' % (word, hard))
            geometric = geometric * (hard)
            count += 1
            if count >= 10:
-                return geometric**(1/count)
+                return geometric ** (1 / count)

-    return geometric**(1/max(count,1))
+    return geometric ** (1 / max(count, 1))


 def text_difficulty_level(s, d):
    s = remove_punctuation(s)
    L = freq(s)

-    lst = [] # a list of tuples, each tuple being (word, difficulty level)
+    lst = []  # a list of tuples, each tuple being (word, difficulty level)
+    stop_words = {'the':1, 'and':1, 'of':1, 'to':1, 'what':1, 'in':1, 'there':1, 'when':1, 'them':1, 'would':1, 'will':1, 'out':1, 'his':1, 'mr':1, 'that':1, 'up':1, 'more':1, 'your':1, 'it':1, 'now':1, 'very':1, 'then':1, 'could':1, 'he':1, 'any':1, 'some':1, 'with':1, 'into':1, 'you':1, 'our':1, 'man':1, 'other':1, 'time':1, 'was':1, 'than':1, 'know':1, 'about':1, 'only':1, 'like':1, 'how':1, 'see':1, 'is':1, 'before':1, 'such':1, 'little':1, 'two':1, 'its':1, 'as':1, 'these':1, 'may':1, 'much':1, 'down':1, 'for':1, 'well':1, 'should':1, 'those':1, 'after':1, 'same':1, 'must':1, 'say':1, 'first':1, 'again':1, 'us':1, 'great':1, 'where':1, 'being':1, 'come':1, 'over':1, 'good':1, 'himself':1, 'am':1, 'never':1, 'on':1, 'old':1, 'here':1, 'way':1, 'at':1, 'go':1, 'upon':1, 'have':1, 'had':1, 'without':1, 'my':1, 'day':1, 'be':1, 'but':1, 'though':1, 'from':1, 'not':1, 'too':1, 'another':1, 'this':1, 'even':1, 'still':1, 'her':1, 'yet':1, 'under':1, 'by':1, 'let':1, 'just':1, 'all':1, 'because':1, 'we':1, 'always':1, 'off':1, 'yes':1, 'so':1, 'while':1, 'why':1, 'which':1, 'me':1, 'are':1, 'or':1, 'no':1, 'if':1, 'an':1, 'also':1, 'thus':1, 'who':1, 'cannot':1, 'she':1, 'whether':1} # ignore these words while computing the artile's difficulty level
    for x in L:
        word = x[0]
-        if word in d:
+        if word not in stop_words and word in d:
            lst.append((word, d[word]))

-    lst2 = sort_in_descending_order(lst) # most difficult words on top
-    #print(lst2)
+    lst2 = sort_in_descending_order(lst)  # most difficult words on top
+    # print(lst2)
    count = 0
    geometric = 1
    for t in lst2:
@ -119,24 +133,20 @@ def text_difficulty_level(s, d):
        hard = t[1]
        geometric = geometric * (hard)
        count += 1
-        if count >= 20: # we look for n most difficult words
-            return geometric**(1/count)
-        
-    return geometric**(1/max(count,1))
+        if count >= 20:  # we look for n most difficult words
+            return geometric ** (1 / count)

+    return geometric ** (1 / max(count, 1))


 if __name__ == '__main__':
-
-
    d1 = load_record('frequency.p')
-    #print(d1)
+    # print(d1)

    d2 = load_record('words_and_tests.p')
-    #print(d2)
+    # print(d2)

-
-    d3 = get_difficulty_level(d1, d2)
+    d3 = get_difficulty_level_for_user(d1, d2)

    s = '''
 South Lawn
@ -197,7 +207,6 @@ Amidst the aftermath of this shocking referendum vote, there is great uncertaint

 '''

-
    s = '''
 British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE)

@ -218,7 +227,6 @@ The prime minister was forced to ask for an extension to Britain's EU departure
 Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues.
 '''

-
    s = '''
 Thank you very much. We have a Cabinet meeting. We’ll have a few questions after grace. And, if you would, Ben, please do the honors.

@ -233,17 +241,11 @@ We need — for our farmers, our manufacturers, for, frankly, unions and non-uni

 '''

-
-
-
-    #f = open('bbc-fulltext/bbc/entertainment/001.txt')
+    # f = open('bbc-fulltext/bbc/entertainment/001.txt')
    f = open('wordlist.txt')
    s = f.read()
    f.close()

-
-
-    
    print(text_difficulty_level(s, d3))


--- a/app/model/user.py
+++ b/app/model/user.py
@ -1,5 +1,6 @@
 from model import *
 from Login import md5
+from pony import orm

 def get_users():
    with db_session:
@ -11,6 +12,11 @@ def get_user_by_username(username):
        if user:
            return user.first()

+def insert_user(username, password, start_date, expiry_date):
+    with db_session:
+        user = User(name=username, password=password, start_date=start_date, expiry_date=expiry_date)
+        orm.commit()
+
 def update_password_by_username(username, password="123456"):
    with db_session:
        user = User.select(name=username)
--- a/app/static/config.yml
+++ b/app/static/config.yml
@ -7,6 +7,7 @@ css:
 js:
  head: # 在页面加载之前加载
    - ../static/js/jquery.js
+    - ../static/js/read.js
    - ../static/js/word_operation.js
  bottom: # 在页面加载完之后加载
    - ../static/js/fillword.js
--- a/app/static/js/fillword.js
+++ b/app/static/js/fillword.js
@ -1,9 +1,5 @@
 let isRead = true;
 let isChoose = true;
-let reader = window.speechSynthesis; // 全局定义朗读者，以便朗读和暂停
-let current_position = 0; // 朗读文本的当前位置
-let original_position = 0; // 朗读文本的初始位置
-let to_speak = ""; // 朗读的初始内容

 function getWord() {
    return window.getSelection ? window.getSelection() : document.selection.createRange().text;
@ -11,7 +7,7 @@ function getWord() {

 function fillInWord() {
    let word = getWord();
-    if (isRead) read(word);
+    if (isRead) Reader.read(word, inputSlider.value);
    if (!isChoose) return;
    const element = document.getElementById("selected-words");
    element.value = element.value + " " + word;
@ -19,50 +15,17 @@ function fillInWord() {

 document.getElementById("text-content").addEventListener("click", fillInWord, false);

-function makeUtterance(str, rate) {
-    let msg = new SpeechSynthesisUtterance(str);
-    msg.rate = rate;
-    msg.lang = "en-US"; // TODO: add language options menu
-    msg.onboundary = ev => {
-        if (ev.name == "word") {
-            current_position = ev.charIndex;
-        }
-    }
-    return msg;
-}
-
-const sliderValue = document.getElementById("rangeValue"); // 显示值
-const inputSlider = document.getElementById("rangeComponent"); // 滑块元素
+const sliderValue = document.getElementById("rangeValue");
+const inputSlider = document.getElementById("rangeComponent");
 inputSlider.oninput = () => {
-    let value = inputSlider.value; // 获取滑块的值
+    let value = inputSlider.value;
    sliderValue.textContent = value + '×';
-    if (!reader.speaking) return;
-    reader.cancel();
-    let msg = makeUtterance(to_speak.substring(original_position + current_position), value);
-    original_position = original_position + current_position;
-    current_position = 0;
-    reader.speak(msg);
 };

-function read(s) {
-    to_speak = s.toString();
-    original_position = 0;
-    current_position = 0;
-    let msg = makeUtterance(to_speak, inputSlider.value);
-    reader.speak(msg);
-}
-
 function onReadClick() {
    isRead = !isRead;
-    if (!isRead) {
-        reader.cancel();
-    }
 }

 function onChooseClick() {
    isChoose = !isChoose;
 }
-
-function stopRead() {
-    reader.cancel();
-}
--- a/app/static/js/highlight.js
+++ b/app/static/js/highlight.js
@ -38,8 +38,18 @@ function highLight() {
        list[i] = list[i].replace('|', "");
        list[i] = list[i].replace('?', "");
        if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
-	    //将文章中所有出现该单词word的地方改为：" <mark>" + word + "<mark> "。 正则表达式RegExp()中，"\\s"代表单词前后必须要有空格，以防止只对单词中的部分字符高亮的情况出现。
-            articleContent = articleContent.replace(new RegExp("\\s"+list[i]+"\\s", "g"), " <mark>" + list[i] + "</mark> ");
+           //将文章中所有出现该单词word的地方改为："<mark>" + word + "<mark>"。 正则表达式RegExp()中，"\\b"代表单词边界匹配。
+
+            //修改代码
+            let articleContent_fb = articleContent;  //文章副本
+            while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
+                //找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
+                const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
+                list[i] = articleContent_fb.substring(index, index + list[i].length);
+
+                articleContent_fb = articleContent_fb.substring(index + list[i].length);    // 使用副本中list[i]之后的子串替换掉副本
+                articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
+            }
        }
    }
    document.getElementById("article").innerHTML = articleContent;
--- a/app/static/js/read.js
+++ b/app/static/js/read.js
@ -0,0 +1,35 @@
+var Reader = (function() {
+    let reader = window.speechSynthesis;
+    let current_position = 0;
+    let original_position = 0;
+    let to_speak = "";
+
+    function makeUtterance(str, rate) {
+        let msg = new SpeechSynthesisUtterance(str);
+        msg.rate = rate;
+        msg.lang = "en-US";
+        msg.onboundary = ev => {
+            if (ev.name == "word") {
+                current_position = ev.charIndex;
+            }
+        }
+        return msg;
+    }
+
+    function read(s, rate) {
+        to_speak = s.toString();
+        original_position = 0;
+        current_position = 0;
+        let msg = makeUtterance(to_speak, rate);
+        reader.speak(msg);
+    }
+
+    function stopRead() {
+        reader.cancel();
+    }
+
+    return {
+        read: read,
+        stopRead: stopRead
+    };
+})();
--- a/app/static/js/word_operation.js
+++ b/app/static/js/word_operation.js
@ -62,6 +62,13 @@ function delete_word(theWord) {
    });
 }

+function read_word(theWord) {
+    let to_speak = $("#word_" + theWord).text();
+    original_position = 0;
+    current_position = 0;
+    Reader.read(to_speak, inputSlider.value);
+}
+
 /* 
 * interface Word {
 *   word: string,
@ -95,6 +102,7 @@ function wordTemplate(word) {
        <a class="btn btn-success" onclick="familiar('${word.word}')" role="button">熟悉</a>
        <a class="btn btn-warning" onclick="unfamiliar('${word.word}')" role="button">不熟悉</a>
        <a class="btn btn-danger" onclick="delete_word('${word.word}')" role="button">删除</a>
+        <a class="btn btn-info" onclick="read_word('${word.word}')" role="button">朗读</a>
    </p>`;
 }

--- a/app/static/words_and_tests.p
+++ b/app/static/words_and_tests.p
--- a/app/templates/admin_manage_user.html
+++ b/app/templates/admin_manage_user.html
@ -68,9 +68,9 @@
 <script>
    // 密码生成器
    function generatePassword(length) {
-        var charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+~`|}{[]\:;?><,./-=";
-        var password = "";
-        for (var i = 0; i < length; i++) {
+        const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^*()_+~`|}{[]\:;?,./-=";
+        let password = "";
+        for (let i = 0; i < length; i++) {
            password += charset.charAt(Math.floor(Math.random() * charset.length));
        }
        return password;
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -49,23 +49,19 @@
 {#        <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
 {#    {% endfor %}#}

-    {% if result_of_generate_article != "had read all articles" %}
-        <a id="next_btn" class="btn btn-success" href="/{{ username }}/reset" role="button"> 下一篇 Next Article </a>
-    {% endif %}
-    {% if session.get('had_read_articles') and session.get('had_read_articles')['index']>0 %}
-        <a id="pre_btn" class="btn btn-success" href="/{{ username }}/back" role="button"> 上一篇 Previous Article </a>
-    {% endif %}
+        <button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button>
+        <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button>

    <p><b>阅读文章并回答问题</b></p>
    <div id="text-content">
-        {% if result_of_generate_article == 'found' %}
-            <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="badge bg-success">{{ today_article["text_level"] }}</span> for you.</div>
-                <p class="text-muted">Article added on: {{ today_article["date"] }}</p><br/>
+        <div id="found">
+            <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
+                <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
            <div class="p-3 mb-2 bg-light text-dark"><br/>
-            <p class="display-5">{{ today_article["article_title"] }}</p><br/>
+            <p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
            <p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/>
-            <p><small class="text-muted">{{ today_article['source'] }}</small></p><br/>
-            <p><b>{{ today_article['question'] }}</b></p><br/>
+            <p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
+            <p><b id="question">{{ today_article['question'] }}</b></p><br/>
                <script type="text/javascript">
                    function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
                        const e = document.getElementById(id);
@ -78,15 +74,13 @@
                <button onclick="toggle_visibility('answer');">ANSWER</button>
                <div id="answer" style="display:none;">{{ today_article['answer'] }}</div><br/>
            </div>
-        {% elif result_of_generate_article == "not found" %}
-            <div class="alert alert-success" role="alert">
-                <p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for you. You can try again a few times or mark new words in the passage to improve your level.</p>
-            </div>
-        {% elif result_of_generate_article == "had read all articles" %}
-            <div class="alert alert-success" role="alert">
-                <p class="text-muted"><span class="badge bg-success">Notes:</span><br>You've read all the articles.</p>
-            </div>
-        {% endif %}
+        </div>
+        <div class="alert alert-success" role="alert" id="not_found" style="display:none;">
+            <p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for you. You can try again a few times or mark new words in the passage to improve your level.</p>
+        </div>
+        <div class="alert alert-success" role="alert" id="read_all" style="display:none;">
+            <p class="text-muted"><span class="badge bg-success">Notes:</span><br>You've read all the articles.</p>
+        </div>
    </div>

    <input type="checkbox" onclick="toggleHighlighting()" checked/>生词高亮
@ -139,6 +133,7 @@
                    <a class="btn btn-success" onclick="familiar('{{ word }}')" role="button">熟悉</a>
                    <a class="btn btn-warning" onclick="unfamiliar('{{ word }}')" role="button">不熟悉</a>
                    <a class="btn btn-danger" onclick="delete_word('{{ word }}')" role="button">删除</a>
+                    <a class="btn btn-info" onclick="read_word('{{ word }}')" role="button">朗读</a>
                </p>
            {% endfor %}
        </div>
@ -151,6 +146,67 @@
        <script src="{{ js }}"></script>
    {% endfor %}
 {% endif %}
+<script type="text/javascript">
+    function load_next_article(){
+        $.ajax({
+            url: '/get_next_article/{{username}}',
+            dataType: 'json',
+            success: function(data) {
+                // 更新页面内容
+                if(data['today_article']){
+                    update(data['today_article']);
+                    check_pre(data['visited_articles']);
+                    check_next(data['result_of_generate_article']);
+                }
+            }
+        });
+    }
+    function load_pre_article(){
+        $.ajax({
+            url: '/get_pre_article/{{username}}',
+            dataType: 'json',
+            success: function(data) {
+                // 更新页面内容
+                if(data['today_article']){
+                    update(data['today_article']);
+                    check_pre(data['visited_articles']);
+                }
+            }
+        });
+    }
+    function update(today_article){
+        $('#user-level').html(today_article['user_level']);
+        $('#text_level').html(today_article["text_level"]);
+        $('#date').html('Article added on: '+today_article["date"]);
+        $('#article_title').html(today_article["article_title"]);
+        $('#article').html(today_article["article_body"]);
+        $('#source').html(today_article['source']);
+        $('#question').html(today_article["question"]);
+        $('#answer').html(today_article["answer"]);
+    }
+<!-- 检查是否存在上一篇或下一篇，不存在则对应按钮隐藏-->
+    function check_pre(visited_articles){
+        if((visited_articles=='')||(visited_articles['index']<=0)){
+            $('#load_pre_article').hide();
+        }else{
+            $('#load_pre_article').show();
+        }
+    }
+    function check_next(result_of_generate_article){
+        if(result_of_generate_article == "found"){
+            $('#found').show();$('#not_found').hide();
+            $('#read_all').hide();
+        }else if(result_of_generate_article == "not found"){
+            $('#found').hide();
+            $('#not_found').show();
+            $('#read_all').hide();
+        }else{
+            $('#found').hide();
+            $('#not_found').hide();
+            $('#read_all').show();
+        }
+    }
+</script>
 </body>
 <style>
    mark {
--- a/app/user_service.py
+++ b/app/user_service.py
@ -21,41 +21,46 @@ userService = Blueprint("user_bp", __name__)
 path_prefix = '/var/www/wordfreq/wordfreq/'
 path_prefix = './'  # comment this line in deployment

-
-@userService.route("/<username>/reset", methods=['GET', 'POST'])
-def user_reset(username):
-    '''
-    用户界面
-    :param username: 用户名
-    :return: 返回页面内容
-    '''
+@userService.route("/get_next_article/<username>",methods=['GET','POST'])
+def get_next_article(username):
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
+    session['old_articleID'] = session.get('articleID')
    if request.method == 'GET':
-        had_read_articles = session.get("had_read_articles")
-        if had_read_articles['article_ids'][-1] == "null":  # 如果当前还是“null”，则将“null”pop出来,无需index+=1
-            had_read_articles['article_ids'].pop()
+        visited_articles = session.get("visited_articles")
+        if visited_articles['article_ids'][-1] == "null":  # 如果当前还是“null”，则将“null”pop出来,无需index+=1
+            visited_articles['article_ids'].pop()
        else:  # 当前不为“null”，直接 index+=1
-            had_read_articles["index"] += 1
-        session["had_read_articles"] = had_read_articles
-        return redirect(url_for('user_bp.userpage', username=username))
+            visited_articles["index"] += 1
+        session["visited_articles"] = visited_articles
+        visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+        data = {
+            'visited_articles': visited_articles,
+            'today_article': today_article,
+            'result_of_generate_article': result_of_generate_article
+        }
    else:
        return 'Under construction'
+    return json.dumps(data)

-@userService.route("/<username>/back", methods=['GET'])
-def user_back(username):
-    '''
-    用户界面
-    :param username: 用户名
-    :return: 返回页面内容
-    '''
+@userService.route("/get_pre_article/<username>",methods=['GET'])
+def get_pre_article(username):
+    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
    if request.method == 'GET':
-        had_read_articles = session.get("had_read_articles")
-        had_read_articles["index"] -= 1  # 上一篇，index-=1
-        if had_read_articles['article_ids'][-1] == "null":  # 如果当前还是“null”，则将“null”pop出来
-            had_read_articles['article_ids'].pop()
-        session["had_read_articles"] = had_read_articles
-        return redirect(url_for('user_bp.userpage', username=username))
-
-
+        visited_articles = session.get("visited_articles")
+        if(visited_articles["index"]==0):
+            data=''
+        else:
+            visited_articles["index"] -= 1  # 上一篇，index-=1
+            if visited_articles['article_ids'][-1] == "null":  # 如果当前还是“null”，则将“null”pop出来
+                visited_articles['article_ids'].pop()
+            session["visited_articles"] = visited_articles
+            visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+            data = {
+                'visited_articles': visited_articles,
+                'today_article': today_article,
+                'result_of_generate_article':result_of_generate_article
+            }
+        return json.dumps(data)

@userService.route("/<username>/<word>/unfamiliar", methods=['GET', 'POST'])
 def unfamiliar(username, word):
@ -139,8 +144,8 @@ def userpage(username):
        words = ''
        for x in lst3:
            words += x[0] + ' '
-        had_read_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('had_read_articles'))
-        session['had_read_articles'] = had_read_articles
+        visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
+        session['visited_articles'] = visited_articles
        # 通过 today_article，加载前端的显示页面
        return render_template('userpage_get.html',
                               admin_name=ADMIN_NAME,
@ -154,10 +159,6 @@ def userpage(username):
                               yml=Yaml.yml,
                               words=words)

-
-
-
-
@userService.route("/<username>/mark", methods=['GET', 'POST'])
 def user_mark_word(username):
    '''
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -39,7 +39,7 @@ def file2str(fname):#文件转字符


 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
-    special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
+    special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
    for c in special_characters:
        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
    s = s.replace('--', ' ')
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ Flask==1.1.2
 selenium==3.141.0
 PyYAML~=6.0
 pony==0.7.16
+snowballstemmer==2.2.0
Author	SHA1	Message	Date
丁晟晔	ff6286cf01	删除 app/test/test_bug551_DingZeYu.py	2024-05-06 11:42:32 +08:00
丁晟晔	1d7e61d751	上传文件至 app/test	2024-05-06 11:36:36 +08:00
顾涵	708a6a2821	Merge pull request 'WIP：Bug529-GuHan' (#88 ) from Bug529-GuHan into master Reviewed-on: http://121.4.94.30:3000/mrlan/EnglishPal/pulls/88	2023-06-04 12:39:34 +08:00
顾涵	688a198768	已经与Alpha-snapshot20230525 分支同步，重新提交	2023-05-28 16:31:12 +08:00
寻宇灿	1543b3095d	Merge remote-tracking branch 'origin/Alpha-snapshot20230519' into Refactor-XunYucan	2023-05-25 22:30:06 +08:00
寻宇灿	c6bf323c60	修改格式	2023-05-25 21:23:25 +08:00
寻宇灿	03ccb3527a	重构前端阅读js，新增阅读器全局对象，新增生词朗读按钮	2023-05-25 17:35:31 +08:00
Hui Lan	b41e1044bc	difficulty.py: add some stop words, hoping that getting the next article can be faster.	2023-05-24 10:12:44 +08:00
Hui Lan	67e921ba60	difficulty.py: todo.	2023-05-23 22:25:40 +08:00
Hui Lan	a5c3564f15	difficulty.py: do not stem a word twice.	2023-05-23 22:22:57 +08:00
Hui Lan	1295616d5b	Merge branch 'Bug476-YuHuangtao' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230519	2023-05-23 19:50:30 +08:00
俞黄焘	c151a0efaa	去掉了get_difficulty_level_for_user的多出的break	2023-05-23 19:40:33 +08:00
顾涵	030b89706e	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个“-”字符对程序的过滤过程不会造成问题。	2023-05-20 15:29:12 +08:00
Hui Lan	349488167b	requirements.txt: install snowballstemmer for better computing a word's difficulty level.	2023-05-19 09:03:20 +08:00
俞黄焘	39d96014d9	pull最新的snapshot-20230511，后更新了difficulty.py和Article.py的部分代码，提交了新的pickle文件	2023-05-18 23:29:38 +08:00
顾涵	acd8db6e3e	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个对“1-”字符的过滤不会造成问题。	2023-05-15 19:24:43 +08:00
顾涵	9f3f5b43e1	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个对“-”字符的过滤不会造成问题。	2023-05-15 19:15:30 +08:00
huangdan	d9f6df7fbe	AJAX载入文章数据	2023-05-11 15:51:10 +08:00
huangdan	5039f5710e	AJAX载入文章数据	2023-05-08 14:33:48 +08:00
Hui Lan	becef7e343	Merge branch 'Bug502-YuGaoXiang' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230506	2023-05-07 15:59:35 +08:00
吴宇涵	01ecc83768	refactor: refactor the way to check article level	2023-05-06 17:42:04 +08:00
吴宇涵	f64d06fbbf	fix: fix Bug 531 and use ES6 grammar	2023-05-06 17:24:51 +08:00
Hui Lan	a4cc4fd011	Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230506	2023-05-06 17:16:08 +08:00
ZhuZhihao	18ca48b422	Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into Bug522-HuangZirui	2023-05-05 17:21:49 +08:00
ZhuZhihao	a80b062b87	refactor: remove variable 'count'	2023-05-05 17:20:58 +08:00
Hui Lan	779dafefe8	Merge branch 'Bug509-XieQiuHan-WangZiming' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230427	2023-04-27 07:21:15 +08:00
Hui Lan	e118d92659	Merge branch 'Alpha-snapshot20230425' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230427	2023-04-27 07:20:21 +08:00
王梓铭	d30a434b2a	修改变量名had_read_articles->visited_articles	2023-04-25 17:47:51 +08:00
zzhaofisher	ce2e1f2978	Merge branch 'DevLocal' into Bug522-HuangZirui	2023-04-18 21:52:28 +08:00
zzhaofisher	11ae093fd7	Merge branch 'Alpha' into Bug522-HuangZirui	2023-04-18 21:52:01 +08:00
zzhaofisher	cc8ca47f8c	refactor: remove sql sentences	2023-04-18 21:50:54 +08:00
zzhaofisher	5d20e92061	Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into DevLocal	2023-04-18 21:50:18 +08:00
Hui Lan	3bce450620	黄子睿: 修复 'Otherwise,' 这种情况无法高亮的问题，即 Otherwise 后面跟了个逗号	2022-12-15 10:50:04 +08:00
Hui Lan	417dbc22f8	highlight.js: fix Bug 522.	2022-12-09 13:19:36 +08:00