为系统添加一个全局变量记录数据库的单词及其等级，使得数据库单词等级只需在登录时进行一次评级，大致能将点击下一篇的时间缩减为原来的10^-15次以下，感谢章翊、赵煜涵、唐伟、宋江涛同学的建议，没有他们我懒得改的

添加测试convert_test_type_to_difficulty_level和get_difficulty_level_for_user函数运行所需时间的几行代码
Merge branch 'Alpha-snapshot20230605' into Bug527-ZhouZhifang
2023-06-18 21:43:00 +08:00 · 2023-06-18 13:21:49 +08:00 · 2023-06-15 15:59:29 +08:00 · 2023-06-15 15:54:46 +08:00 · 2023-06-02 21:34:27 +08:00 · 2023-06-02 21:33:21 +08:00
6 changed files with 81 additions and 78 deletions
--- a/app/difficulty.py
+++ b/app/difficulty.py
@ -6,10 +6,14 @@
 # Purpose: compute difficulty level of a English text
 import pickle
-import math
+import time
-from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
+
 import snowballstemmer
 from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 # 定义一个全局的res_d， 记录数据库单词评级之后的单词及其等级
 res_d = {}
 def load_record(pickle_fname):
    f = open(pickle_fname, 'rb')
@ -24,6 +28,7 @@ def convert_test_type_to_difficulty_level(d):
    :param d: 存储了单词库pickle文件中的单词的字典
    :return:
    """
    time_start = time.time()
    result = {}
    L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word
@ -38,7 +43,10 @@ def convert_test_type_to_difficulty_level(d):
            result[k] = 7
        elif 'BBC' in d[k]:
            result[k] = 8
-
+    time_end = time.time()
    print('convert_test_type_to_difficulty_level totally cost', time_end - time_start)
    global res_d
    res_d = result
    return result  # {'apple': 4, ...}
@ -48,8 +56,12 @@ def get_difficulty_level_for_user(d1, d2):
    d1 用户不会的词
    在d2的后面添加单词，没有新建一个新的字典
    """
    time_start = time.time()
    # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed.
    if res_d == {}:
        d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
    else:
        d2 = res_d
    stemmer = snowballstemmer.stemmer('english')
    for k in d1:  # 用户的词
@ -61,6 +73,8 @@ def get_difficulty_level_for_user(d1, d2):
                d2[k] = d2[stem]  # 按照词根进行评级
            else:
                d2[k] = 3  # 如果k的词根都不在，那么就当认为是3级
    time_end = time.time()
    print('get_difficulty_level_for_user totally cost', time_end - time_start)
    return d2
--- a/app/static/js/fillword.js
+++ b/app/static/js/fillword.js
@ -29,3 +29,8 @@ function onReadClick() {
 function onChooseClick() {
    isChoose = !isChoose;
 }
 // 如果网页刷新，停止播放声音
 if (performance.getEntriesByType("navigation")[0].type == "reload") {
    Reader.stopRead();
 }
--- a/app/static/js/highlight.js
+++ b/app/static/js/highlight.js
@ -22,62 +22,38 @@ function getWord() {
 function highLight() {
    if (!isHighlight) return;
-    let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml，使用innerText会把原文章中所包含的<br>标签去除，导致处理后的文章内容失去了原来的格式
+    let articleContent = document.getElementById("article").innerHTML; // innerHTML保留HTML标签来保持部分格式，且适配不同的浏览器
    let pickedWords = document.getElementById("selected-words");  // words picked to the text area
    let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
-    let allWords = "";  //初始化allWords的值，避免进入判断后编译器认为allWords未初始化的问题
+    let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
-    if(dictionaryWords != null){//增加一个判断，检查生词本里面是否为空，如果为空，allWords只添加选中的单词
+    const list = allWords.split(" "); // 将所有的生词放入一个list中
-        allWords = pickedWords.value + " " + dictionaryWords.value;
+    let totalSet = new Set();
    }
    else{
        allWords = pickedWords.value + " ";
    }
    const list = allWords.split(" ");//将所有的生词放入一个list中，用于后续处理
    for (let i = 0; i < list.length; ++i) {
-        list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
+        list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
-        list[i] = list[i].replace('|', "");
+        if (list[i] != "" && !totalSet.has(list[i])) {
-        list[i] = list[i].replace('?', "");
+            // 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
-        if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
+            let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi")));
-           //将文章中所有出现该单词word的地方改为："<mark>" + word + "<mark>"。 正则表达式RegExp()中，"\\b"代表单词边界匹配。
+            if (matches.has("mark")) {
-
+                // 优先处理单词为 "mark" 的情况
-            //修改代码
+                totalSet = new Set(["mark", ...totalSet]);
-            let articleContent_fb = articleContent;  //文章副本
+            }
-            while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
+            totalSet = new Set([...totalSet, ...matches]);
                //找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
                const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
                list[i] = articleContent_fb.substring(index, index + list[i].length);
                articleContent_fb = articleContent_fb.substring(index + list[i].length);    // 使用副本中list[i]之后的子串替换掉副本
                articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
        }
    } 
    // 删除所有的mark标签,防止标签发生嵌套
    articleContent = articleContent.replace(/<(mark)[^>]*>/gi, "");
    articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, "");
    // 将文章中所有出现该单词word的地方改为："<mark>" + word + "<mark>"。
    for (let word of totalSet) {
        articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>");
    }
    document.getElementById("article").innerHTML = articleContent;
 }
 function cancelHighlighting() {
-    let articleContent = document.getElementById("article").innerText;//将原来的.innerText改为.innerHtml，原因同上
+    let articleContent = document.getElementById("article").innerHTML;
-    let pickedWords = document.getElementById("selected-words");
+    articleContent = articleContent.replace(/<(mark)[^>]*>/gi, "");
-    const dictionaryWords = document.getElementById("selected-words2");    
+    articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, "");
    const list = pickedWords.value.split(" ");    
    if (pickedWords != null) {
        for (let i = 0; i < list.length; ++i) {
            list[i] = list[i].replace(/(^\s*)|(\s*$)/g, "");
            if (list[i] !== "") { //原来判断的代码中，替换的内容为“list[i]”这个字符串，这明显是错误的，我们需要替换的是list[i]里的内容
                articleContent = articleContent.replace(new RegExp("<mark>"+list[i]+"</mark>", "g"), list[i]);
            }
        }
    }
    if (dictionaryWords != null) {
        let list2 = pickedWords.value.split(" ");
        for (let i = 0; i < list2.length; ++i) {
            list2 = dictionaryWords.value.split(" ");
            list2[i] = list2[i].replace(/(^\s*)|(\s*$)/g, "");
            if (list2[i] !== "") { //原来代码中，替换的内容为“list[i]”这个字符串，这明显是错误的，我们需要替换的是list[i]里的内容
                articleContent = articleContent.replace(new RegExp("<mark>"+list2[i]+"</mark>", "g"), list2[i]);
            }
        }
    }
    document.getElementById("article").innerHTML = articleContent;
 }
--- a/app/static/js/read.js
+++ b/app/static/js/read.js
@ -9,7 +9,7 @@ var Reader = (function() {
        msg.rate = rate;
        msg.lang = "en-US";
        msg.onboundary = ev => {
-            if (ev.name == "word") {
+            if (ev.name === "word") {
                current_position = ev.charIndex;
            }
        }
--- a/app/static/js/word_operation.js
+++ b/app/static/js/word_operation.js
@ -9,7 +9,6 @@ function familiar(theWord) {
            let new_freq = freq - 1;
            const allow_move = document.getElementById("move_dynamiclly").checked;
            if (allow_move) {
                if (new_freq <= 0) {
                    removeWord(theWord);
                } else {
@ -114,7 +113,7 @@ function removeWord(word) {
    // 根据词频信息删除元素
    word = word.replace('&amp;', '&');
    const element_to_remove = document.getElementById(`p_${word}`);
-    if (element_to_remove != null) {
+    if (element_to_remove !== null) {
        element_to_remove.remove();
    }
 }
@ -129,7 +128,7 @@ function renderWord(word) {
    for (const current of container.children) {
        const cur_word = parseWord(current);
        // 找到第一个词频比它小的元素，插入到这个元素前面
-        if (compareWord(cur_word, word) == -1) {
+        if (compareWord(cur_word, word) === -1) {
            container.insertBefore(new_element, current);
            inserted = true;
            break;
@ -165,17 +164,11 @@ function elementFromString(string) {
 *  当first大于second时返回1
 */
 function compareWord(first, second) {
-    if (first.freq < second.freq) {
+    if (first.freq !== second.freq) {
-        return -1;
+        return first.freq < second.freq ? -1 : 1;
    }
-    if (first.freq > second.freq) {
+    if (first.word !== second.word) {
-        return 1;
+        return first.word < second.word ? -1 : 1;
    }
    if (first.word < second.word) {
        return -1;
    }
    if (first.word > second.word) {
        return 1;
    }
    return 0;
 }
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -32,6 +32,17 @@
            40%, 60% { transform: translate3d(+4px, 0, 0); }
            50% { transform: translate3d(-4px, 0, 0); }
        }
        .lead{
            font-size: 17px;
            font-family: Arial, Helvetica, sans-serif;
        }
        .display-5{
            font-size: 30px;
        }
    </style>
 </head>
 <body>
@ -49,18 +60,22 @@
 {#        <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
 {#    {% endfor %}#}
-        <button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button>
+        <button class="btn btn-success" id="load_next_article" onclick="load_next_article();Reader.stopRead()"> 下一篇 Next Article </button>
-        <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button>
+        <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" > 上一篇 Previous Article </button>
    <p><b>阅读文章并回答问题</b></p>
    <div id="text-content">
        <div id="found">
            <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
                <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
-            <div class="p-3 mb-2 bg-light text-dark"><br/>
+            <div class="p-3 mb-2 bg-light text-dark" style="margin: 0 0.5%;"><br/>
            <p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
-            <p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/>
+            <p class="lead"><font id="article">{{ today_article["article_body"] }}</font></p><br/>
            <div>
                <h6>source: </h6>
                <p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
            </div>
            <p><b id="question">{{ today_article['question'] }}</b></p><br/>
                <script type="text/javascript">
                    function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
@ -97,7 +112,7 @@
    <p><b>收集生词吧</b> （可以在正文中划词，也可以复制黏贴）</p>
    <form method="post" action="/{{ username }}/userpage">
        <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
-        <input type="submit" value="把生词加入我的生词库"/>
+        <input type="submit" value="把生词加入我的生词库" onclick="Reader.stopRead()"/>
        <input type="reset" value="清除"/>
    </form>
    {% if session.get['thisWord'] %}
Author	SHA1	Message	Date
俞黄焘	4e1e19d71d	为系统添加一个全局变量记录数据库的单词及其等级，使得数据库单词等级只需在登录时进行一次评级，大致能将点击下一篇的时间缩减为原来的10^-15次以下，感谢章翊、赵煜涵、唐伟、宋江涛同学的建议，没有他们我懒得改的	2023-06-18 21:43:00 +08:00
俞黄焘	3361e4ba79	添加测试convert_test_type_to_difficulty_level和get_difficulty_level_for_user函数运行所需时间的几行代码	2023-06-18 13:21:49 +08:00
Laugh	4d2535a6e8	Merge branch 'Alpha-snapshot20230605' into Bug527-ZhouZhifang	2023-06-15 15:59:29 +08:00
Laugh	bb2d0363e4	Feat: update fontsize and margin of some elements	2023-06-15 15:54:46 +08:00
黄子睿	9816596cf8	删除 'app/static/js/tanyanmei-fillword.js'	2023-06-02 21:34:27 +08:00
黄子睿	682247bff1	refactor partial function and code writing specifications	2023-06-02 21:33:21 +08:00
Hui Lan	b22c654f0f	Merge branch 'Bug529-GuHan' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230601	2023-06-01 07:40:27 +08:00
倪玲丽	d402bb45cb	刷新屏幕，点击上下篇，加入生词库，停止阅读(更改)	2023-05-31 18:37:05 +08:00
倪玲丽	cdf6180901	刷新屏幕，点击上下篇，加入生词库，停止阅读	2023-05-30 18:48:37 +08:00
倪玲丽	38837c9c2f	合并最新的Alpha-snapshot20230529	2023-05-30 18:45:23 +08:00
黄子睿	a0ddf4bdad	上传文件至 'app/static/js' 修复了 Bug492 选中问号出现多个问号的问题。解决了选中紧跟标点符号的单词，单词能正常显示。优化了选中较长的文章时页面容易出现卡顿的问题。	2023-05-27 17:33:37 +08:00
倪玲丽	df64065dcc	点击上下篇，停止阅读	2023-05-06 18:19:24 +08:00
倪玲丽	ce28b91bd5	屏幕刷新，停止阅读	2023-05-06 18:18:12 +08:00
倪玲丽	d6bd24ee1c	Merge branch 'Alpha' of http://121.4.94.30:3000/mrlan/EnglishPal into Bug393-TanYanMei merge alpha	2023-04-24 12:01:08 +08:00
Hui Lan	e9e2bd3d23	Remove static\js	2023-04-24 11:43:43 +08:00
Hui Lan	320a99d479	Move Tan Yanmei's fillword.js to app/static/js/tanyanmei-fillword.js. Delete the strange folder static js.	2023-04-24 11:38:16 +08:00
Hui Lan	3eca9234a9	Merge branch 'Bug393-TanYanMei' of http://121.4.94.30:3000/mrlan/EnglishPal into Bug393-TanYanMei	2023-04-24 11:32:15 +08:00
覃艳美	8924166975	上传文件至 'static\js'	2022-06-12 21:23:34 +08:00