forked from mrlan/EnglishPal
Compare commits
1 Commits
master
...
Alpha-snap
Author | SHA1 | Date |
---|---|---|
黄子睿 | a0ddf4bdad |
|
@ -22,33 +22,19 @@ function getWord() {
|
||||||
|
|
||||||
function highLight() {
|
function highLight() {
|
||||||
if (!isHighlight) return;
|
if (!isHighlight) return;
|
||||||
let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml,使用innerText会把原文章中所包含的<br>标签去除,导致处理后的文章内容失去了原来的格式
|
let articleContent = document.getElementById("article").innerText;
|
||||||
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
|
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
|
||||||
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
|
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
|
||||||
let allWords = ""; //初始化allWords的值,避免进入判断后编译器认为allWords未初始化的问题
|
let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
|
||||||
if(dictionaryWords != null){//增加一个判断,检查生词本里面是否为空,如果为空,allWords只添加选中的单词
|
const list = allWords.split(" "); // 将所有的生词放入一个list中,用于后续处理
|
||||||
allWords = pickedWords.value + " " + dictionaryWords.value;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
allWords = pickedWords.value + " ";
|
|
||||||
}
|
|
||||||
const list = allWords.split(" ");//将所有的生词放入一个list中,用于后续处理
|
|
||||||
for (let i = 0; i < list.length; ++i) {
|
for (let i = 0; i < list.length; ++i) {
|
||||||
list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
|
list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
|
||||||
list[i] = list[i].replace('|', "");
|
if (list[i] != "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
|
||||||
list[i] = list[i].replace('?', "");
|
// 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
|
||||||
if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
|
let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi")));
|
||||||
//将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。 正则表达式RegExp()中,"\\b"代表单词边界匹配。
|
for (let word of matches) {
|
||||||
|
// 将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。
|
||||||
//修改代码
|
articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>");
|
||||||
let articleContent_fb = articleContent; //文章副本
|
|
||||||
while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
|
|
||||||
//找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
|
|
||||||
const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
|
|
||||||
list[i] = articleContent_fb.substring(index, index + list[i].length);
|
|
||||||
|
|
||||||
articleContent_fb = articleContent_fb.substring(index + list[i].length); // 使用副本中list[i]之后的子串替换掉副本
|
|
||||||
articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ def file2str(fname):#文件转字符
|
||||||
|
|
||||||
|
|
||||||
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
|
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
|
||||||
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
||||||
for c in special_characters:
|
for c in special_characters:
|
||||||
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
||||||
s = s.replace('--', ' ')
|
s = s.replace('--', ' ')
|
||||||
|
|
Loading…
Reference in New Issue