Merge pull request 'WIP：Bug529-GuHan' (#88 ) from Bug529-GuHan into master

Reviewed-on: http://121.4.94.30:3000/mrlan/EnglishPal/pulls/88
已经与Alpha-snapshot20230525 分支同步，重新提交
2023-06-04 12:39:34 +08:00 · 2023-05-28 16:31:12 +08:00 · 2023-05-20 15:29:12 +08:00 · 2023-05-15 19:24:43 +08:00 · 2023-05-15 19:15:30 +08:00
2 changed files with 25 additions and 11 deletions
--- a/app/static/js/highlight.js
+++ b/app/static/js/highlight.js
@ -22,19 +22,33 @@ function getWord() {
 function highLight() {
    if (!isHighlight) return;
-    let articleContent = document.getElementById("article").innerText;
+    let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml，使用innerText会把原文章中所包含的<br>标签去除，导致处理后的文章内容失去了原来的格式
    let pickedWords = document.getElementById("selected-words");  // words picked to the text area
    let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
-    let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
+    let allWords = "";  //初始化allWords的值，避免进入判断后编译器认为allWords未初始化的问题
    if(dictionaryWords != null){//增加一个判断，检查生词本里面是否为空，如果为空，allWords只添加选中的单词
        allWords = pickedWords.value + " " + dictionaryWords.value;
    }
    else{
        allWords = pickedWords.value + " ";
    }
    const list = allWords.split(" ");//将所有的生词放入一个list中，用于后续处理
    for (let i = 0; i < list.length; ++i) {
-        list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
+        list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
-        if (list[i] != "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
+        list[i] = list[i].replace('|', "");
-            // 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
+        list[i] = list[i].replace('?', "");
-            let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi")));
+        if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
-            for (let word of matches) {
+           //将文章中所有出现该单词word的地方改为："<mark>" + word + "<mark>"。 正则表达式RegExp()中，"\\b"代表单词边界匹配。
-                // 将文章中所有出现该单词word的地方改为："<mark>" + word + "<mark>"。 
+
-                articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>");
+            //修改代码
            let articleContent_fb = articleContent;  //文章副本
            while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
                //找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
                const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
                list[i] = articleContent_fb.substring(index, index + list[i].length);
                articleContent_fb = articleContent_fb.substring(index + list[i].length);    // 使用副本中list[i]之后的子串替换掉副本
                articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
            }
        }
    }
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -39,7 +39,7 @@ def file2str(fname):#文件转字符
 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
-    special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
+    special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
    for c in special_characters:
        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
    s = s.replace('--', ' ')
Author	SHA1	Message	Date
顾涵	708a6a2821	Merge pull request 'WIP：Bug529-GuHan' (#88 ) from Bug529-GuHan into master Reviewed-on: http://121.4.94.30:3000/mrlan/EnglishPal/pulls/88	2023-06-04 12:39:34 +08:00
顾涵	688a198768	已经与Alpha-snapshot20230525 分支同步，重新提交	2023-05-28 16:31:12 +08:00
顾涵	030b89706e	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个“-”字符对程序的过滤过程不会造成问题。	2023-05-20 15:29:12 +08:00
顾涵	acd8db6e3e	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个对“1-”字符的过滤不会造成问题。	2023-05-15 19:24:43 +08:00
顾涵	9f3f5b43e1	special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}\|' 用于过滤字符，我将其中的“-”删去，使连字符没有被过滤，实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug，答案是肯定的，但是这段代码中的过滤字符虽然多，但是并没有完全过滤掉所有字符，（过滤的只是键盘上能打出的字符，不包括输入法中能打出的特殊字符），所以字符bug本身就一直存在，我认为减少一个对“-”字符的过滤不会造成问题。	2023-05-15 19:15:30 +08:00