forked from mrlan/EnglishPal
Compare commits
7 Commits
Alpha-snap
...
master
Author | SHA1 | Date |
---|---|---|
丁晟晔 | ff6286cf01 | |
丁晟晔 | 1d7e61d751 | |
顾涵 | 708a6a2821 | |
顾涵 | 688a198768 | |
顾涵 | 030b89706e | |
顾涵 | acd8db6e3e | |
顾涵 | 9f3f5b43e1 |
|
@ -22,19 +22,33 @@ function getWord() {
|
|||
|
||||
function highLight() {
|
||||
if (!isHighlight) return;
|
||||
let articleContent = document.getElementById("article").innerText;
|
||||
let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml,使用innerText会把原文章中所包含的<br>标签去除,导致处理后的文章内容失去了原来的格式
|
||||
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
|
||||
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
|
||||
let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
|
||||
const list = allWords.split(" "); // 将所有的生词放入一个list中,用于后续处理
|
||||
let allWords = ""; //初始化allWords的值,避免进入判断后编译器认为allWords未初始化的问题
|
||||
if(dictionaryWords != null){//增加一个判断,检查生词本里面是否为空,如果为空,allWords只添加选中的单词
|
||||
allWords = pickedWords.value + " " + dictionaryWords.value;
|
||||
}
|
||||
else{
|
||||
allWords = pickedWords.value + " ";
|
||||
}
|
||||
const list = allWords.split(" ");//将所有的生词放入一个list中,用于后续处理
|
||||
for (let i = 0; i < list.length; ++i) {
|
||||
list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
|
||||
if (list[i] != "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
|
||||
// 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
|
||||
let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi")));
|
||||
for (let word of matches) {
|
||||
// 将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。
|
||||
articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>");
|
||||
list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
|
||||
list[i] = list[i].replace('|', "");
|
||||
list[i] = list[i].replace('?', "");
|
||||
if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
|
||||
//将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。 正则表达式RegExp()中,"\\b"代表单词边界匹配。
|
||||
|
||||
//修改代码
|
||||
let articleContent_fb = articleContent; //文章副本
|
||||
while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
|
||||
//找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
|
||||
const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
|
||||
list[i] = articleContent_fb.substring(index, index + list[i].length);
|
||||
|
||||
articleContent_fb = articleContent_fb.substring(index + list[i].length); // 使用副本中list[i]之后的子串替换掉副本
|
||||
articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ def file2str(fname):#文件转字符
|
|||
|
||||
|
||||
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
|
||||
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
||||
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
||||
for c in special_characters:
|
||||
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
||||
s = s.replace('--', ' ')
|
||||
|
|
Loading…
Reference in New Issue