forked from mrlan/EnglishPal
Compare commits
No commits in common. "Bug476-Yuhuangtao" and "master" have entirely different histories.
Bug476-Yuh
...
master
|
@ -6,14 +6,10 @@
|
||||||
# Purpose: compute difficulty level of a English text
|
# Purpose: compute difficulty level of a English text
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
import time
|
import math
|
||||||
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
||||||
import snowballstemmer
|
import snowballstemmer
|
||||||
|
|
||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
|
||||||
|
|
||||||
# 定义一个全局的res_d, 记录数据库单词评级之后的单词及其等级
|
|
||||||
res_d = {}
|
|
||||||
|
|
||||||
def load_record(pickle_fname):
|
def load_record(pickle_fname):
|
||||||
f = open(pickle_fname, 'rb')
|
f = open(pickle_fname, 'rb')
|
||||||
|
@ -28,7 +24,6 @@ def convert_test_type_to_difficulty_level(d):
|
||||||
:param d: 存储了单词库pickle文件中的单词的字典
|
:param d: 存储了单词库pickle文件中的单词的字典
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
time_start = time.time()
|
|
||||||
result = {}
|
result = {}
|
||||||
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
|
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
|
||||||
|
|
||||||
|
@ -43,10 +38,7 @@ def convert_test_type_to_difficulty_level(d):
|
||||||
result[k] = 7
|
result[k] = 7
|
||||||
elif 'BBC' in d[k]:
|
elif 'BBC' in d[k]:
|
||||||
result[k] = 8
|
result[k] = 8
|
||||||
time_end = time.time()
|
|
||||||
print('convert_test_type_to_difficulty_level totally cost', time_end - time_start)
|
|
||||||
global res_d
|
|
||||||
res_d = result
|
|
||||||
return result # {'apple': 4, ...}
|
return result # {'apple': 4, ...}
|
||||||
|
|
||||||
|
|
||||||
|
@ -56,12 +48,8 @@ def get_difficulty_level_for_user(d1, d2):
|
||||||
d1 用户不会的词
|
d1 用户不会的词
|
||||||
在d2的后面添加单词,没有新建一个新的字典
|
在d2的后面添加单词,没有新建一个新的字典
|
||||||
"""
|
"""
|
||||||
time_start = time.time()
|
|
||||||
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
|
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
|
||||||
if res_d == {}:
|
|
||||||
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
|
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
|
||||||
else:
|
|
||||||
d2 = res_d
|
|
||||||
stemmer = snowballstemmer.stemmer('english')
|
stemmer = snowballstemmer.stemmer('english')
|
||||||
|
|
||||||
for k in d1: # 用户的词
|
for k in d1: # 用户的词
|
||||||
|
@ -73,8 +61,6 @@ def get_difficulty_level_for_user(d1, d2):
|
||||||
d2[k] = d2[stem] # 按照词根进行评级
|
d2[k] = d2[stem] # 按照词根进行评级
|
||||||
else:
|
else:
|
||||||
d2[k] = 3 # 如果k的词根都不在,那么就当认为是3级
|
d2[k] = 3 # 如果k的词根都不在,那么就当认为是3级
|
||||||
time_end = time.time()
|
|
||||||
print('get_difficulty_level_for_user totally cost', time_end - time_start)
|
|
||||||
return d2
|
return d2
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,3 @@ function onReadClick() {
|
||||||
function onChooseClick() {
|
function onChooseClick() {
|
||||||
isChoose = !isChoose;
|
isChoose = !isChoose;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 如果网页刷新,停止播放声音
|
|
||||||
if (performance.getEntriesByType("navigation")[0].type == "reload") {
|
|
||||||
Reader.stopRead();
|
|
||||||
}
|
|
|
@ -22,38 +22,62 @@ function getWord() {
|
||||||
|
|
||||||
function highLight() {
|
function highLight() {
|
||||||
if (!isHighlight) return;
|
if (!isHighlight) return;
|
||||||
let articleContent = document.getElementById("article").innerHTML; // innerHTML保留HTML标签来保持部分格式,且适配不同的浏览器
|
let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml,使用innerText会把原文章中所包含的<br>标签去除,导致处理后的文章内容失去了原来的格式
|
||||||
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
|
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
|
||||||
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
|
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
|
||||||
let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
|
let allWords = ""; //初始化allWords的值,避免进入判断后编译器认为allWords未初始化的问题
|
||||||
const list = allWords.split(" "); // 将所有的生词放入一个list中
|
if(dictionaryWords != null){//增加一个判断,检查生词本里面是否为空,如果为空,allWords只添加选中的单词
|
||||||
let totalSet = new Set();
|
allWords = pickedWords.value + " " + dictionaryWords.value;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
allWords = pickedWords.value + " ";
|
||||||
|
}
|
||||||
|
const list = allWords.split(" ");//将所有的生词放入一个list中,用于后续处理
|
||||||
for (let i = 0; i < list.length; ++i) {
|
for (let i = 0; i < list.length; ++i) {
|
||||||
list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
|
list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
|
||||||
if (list[i] != "" && !totalSet.has(list[i])) {
|
list[i] = list[i].replace('|', "");
|
||||||
// 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
|
list[i] = list[i].replace('?', "");
|
||||||
let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi")));
|
if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
|
||||||
if (matches.has("mark")) {
|
//将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。 正则表达式RegExp()中,"\\b"代表单词边界匹配。
|
||||||
// 优先处理单词为 "mark" 的情况
|
|
||||||
totalSet = new Set(["mark", ...totalSet]);
|
//修改代码
|
||||||
}
|
let articleContent_fb = articleContent; //文章副本
|
||||||
totalSet = new Set([...totalSet, ...matches]);
|
while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
|
||||||
|
//找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
|
||||||
|
const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
|
||||||
|
list[i] = articleContent_fb.substring(index, index + list[i].length);
|
||||||
|
|
||||||
|
articleContent_fb = articleContent_fb.substring(index + list[i].length); // 使用副本中list[i]之后的子串替换掉副本
|
||||||
|
articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 删除所有的mark标签,防止标签发生嵌套
|
|
||||||
articleContent = articleContent.replace(/<(mark)[^>]*>/gi, "");
|
|
||||||
articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, "");
|
|
||||||
// 将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。
|
|
||||||
for (let word of totalSet) {
|
|
||||||
articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>");
|
|
||||||
}
|
}
|
||||||
document.getElementById("article").innerHTML = articleContent;
|
document.getElementById("article").innerHTML = articleContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cancelHighlighting() {
|
function cancelHighlighting() {
|
||||||
let articleContent = document.getElementById("article").innerHTML;
|
let articleContent = document.getElementById("article").innerText;//将原来的.innerText改为.innerHtml,原因同上
|
||||||
articleContent = articleContent.replace(/<(mark)[^>]*>/gi, "");
|
let pickedWords = document.getElementById("selected-words");
|
||||||
articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, "");
|
const dictionaryWords = document.getElementById("selected-words2");
|
||||||
|
const list = pickedWords.value.split(" ");
|
||||||
|
if (pickedWords != null) {
|
||||||
|
for (let i = 0; i < list.length; ++i) {
|
||||||
|
list[i] = list[i].replace(/(^\s*)|(\s*$)/g, "");
|
||||||
|
if (list[i] !== "") { //原来判断的代码中,替换的内容为“list[i]”这个字符串,这明显是错误的,我们需要替换的是list[i]里的内容
|
||||||
|
articleContent = articleContent.replace(new RegExp("<mark>"+list[i]+"</mark>", "g"), list[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dictionaryWords != null) {
|
||||||
|
let list2 = pickedWords.value.split(" ");
|
||||||
|
for (let i = 0; i < list2.length; ++i) {
|
||||||
|
list2 = dictionaryWords.value.split(" ");
|
||||||
|
list2[i] = list2[i].replace(/(^\s*)|(\s*$)/g, "");
|
||||||
|
if (list2[i] !== "") { //原来代码中,替换的内容为“list[i]”这个字符串,这明显是错误的,我们需要替换的是list[i]里的内容
|
||||||
|
articleContent = articleContent.replace(new RegExp("<mark>"+list2[i]+"</mark>", "g"), list2[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
document.getElementById("article").innerHTML = articleContent;
|
document.getElementById("article").innerHTML = articleContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ var Reader = (function() {
|
||||||
msg.rate = rate;
|
msg.rate = rate;
|
||||||
msg.lang = "en-US";
|
msg.lang = "en-US";
|
||||||
msg.onboundary = ev => {
|
msg.onboundary = ev => {
|
||||||
if (ev.name === "word") {
|
if (ev.name == "word") {
|
||||||
current_position = ev.charIndex;
|
current_position = ev.charIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ function familiar(theWord) {
|
||||||
let new_freq = freq - 1;
|
let new_freq = freq - 1;
|
||||||
const allow_move = document.getElementById("move_dynamiclly").checked;
|
const allow_move = document.getElementById("move_dynamiclly").checked;
|
||||||
if (allow_move) {
|
if (allow_move) {
|
||||||
|
|
||||||
if (new_freq <= 0) {
|
if (new_freq <= 0) {
|
||||||
removeWord(theWord);
|
removeWord(theWord);
|
||||||
} else {
|
} else {
|
||||||
|
@ -113,7 +114,7 @@ function removeWord(word) {
|
||||||
// 根据词频信息删除元素
|
// 根据词频信息删除元素
|
||||||
word = word.replace('&', '&');
|
word = word.replace('&', '&');
|
||||||
const element_to_remove = document.getElementById(`p_${word}`);
|
const element_to_remove = document.getElementById(`p_${word}`);
|
||||||
if (element_to_remove !== null) {
|
if (element_to_remove != null) {
|
||||||
element_to_remove.remove();
|
element_to_remove.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -128,7 +129,7 @@ function renderWord(word) {
|
||||||
for (const current of container.children) {
|
for (const current of container.children) {
|
||||||
const cur_word = parseWord(current);
|
const cur_word = parseWord(current);
|
||||||
// 找到第一个词频比它小的元素,插入到这个元素前面
|
// 找到第一个词频比它小的元素,插入到这个元素前面
|
||||||
if (compareWord(cur_word, word) === -1) {
|
if (compareWord(cur_word, word) == -1) {
|
||||||
container.insertBefore(new_element, current);
|
container.insertBefore(new_element, current);
|
||||||
inserted = true;
|
inserted = true;
|
||||||
break;
|
break;
|
||||||
|
@ -164,11 +165,17 @@ function elementFromString(string) {
|
||||||
* 当first大于second时返回1
|
* 当first大于second时返回1
|
||||||
*/
|
*/
|
||||||
function compareWord(first, second) {
|
function compareWord(first, second) {
|
||||||
if (first.freq !== second.freq) {
|
if (first.freq < second.freq) {
|
||||||
return first.freq < second.freq ? -1 : 1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (first.word !== second.word) {
|
if (first.freq > second.freq) {
|
||||||
return first.word < second.word ? -1 : 1;
|
return 1;
|
||||||
|
}
|
||||||
|
if (first.word < second.word) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (first.word > second.word) {
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
|
@ -32,17 +32,6 @@
|
||||||
40%, 60% { transform: translate3d(+4px, 0, 0); }
|
40%, 60% { transform: translate3d(+4px, 0, 0); }
|
||||||
50% { transform: translate3d(-4px, 0, 0); }
|
50% { transform: translate3d(-4px, 0, 0); }
|
||||||
}
|
}
|
||||||
|
|
||||||
.lead{
|
|
||||||
font-size: 17px;
|
|
||||||
font-family: Arial, Helvetica, sans-serif;
|
|
||||||
}
|
|
||||||
|
|
||||||
.display-5{
|
|
||||||
font-size: 30px;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
@ -60,22 +49,18 @@
|
||||||
{# <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
|
{# <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
|
||||||
{# {% endfor %}#}
|
{# {% endfor %}#}
|
||||||
|
|
||||||
<button class="btn btn-success" id="load_next_article" onclick="load_next_article();Reader.stopRead()"> 下一篇 Next Article </button>
|
<button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button>
|
||||||
<button class="btn btn-success" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" > 上一篇 Previous Article </button>
|
<button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button>
|
||||||
|
|
||||||
<p><b>阅读文章并回答问题</b></p>
|
<p><b>阅读文章并回答问题</b></p>
|
||||||
<div id="text-content">
|
<div id="text-content">
|
||||||
<div id="found">
|
<div id="found">
|
||||||
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
|
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
|
||||||
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
||||||
<div class="p-3 mb-2 bg-light text-dark" style="margin: 0 0.5%;"><br/>
|
<div class="p-3 mb-2 bg-light text-dark"><br/>
|
||||||
<p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
|
<p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
|
||||||
<p class="lead"><font id="article">{{ today_article["article_body"] }}</font></p><br/>
|
<p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/>
|
||||||
<div>
|
|
||||||
<h6>source: </h6>
|
|
||||||
<p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
|
<p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
|
||||||
</div>
|
|
||||||
|
|
||||||
<p><b id="question">{{ today_article['question'] }}</b></p><br/>
|
<p><b id="question">{{ today_article['question'] }}</b></p><br/>
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
|
function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
|
||||||
|
@ -112,7 +97,7 @@
|
||||||
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
|
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
|
||||||
<form method="post" action="/{{ username }}/userpage">
|
<form method="post" action="/{{ username }}/userpage">
|
||||||
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
|
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
|
||||||
<input type="submit" value="把生词加入我的生词库" onclick="Reader.stopRead()"/>
|
<input type="submit" value="把生词加入我的生词库"/>
|
||||||
<input type="reset" value="清除"/>
|
<input type="reset" value="清除"/>
|
||||||
</form>
|
</form>
|
||||||
{% if session.get['thisWord'] %}
|
{% if session.get['thisWord'] %}
|
||||||
|
|
Loading…
Reference in New Issue