forked from mrlan/EnglishPal
				
			Compare commits
	
		
			2 Commits 
		
	
	
		
			Bug476-Yuh
			...
			master
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | ff6286cf01 | |
|  | 1d7e61d751 | 
|  | @ -6,14 +6,10 @@ | ||||||
| # Purpose: compute difficulty level of a English text | # Purpose: compute difficulty level of a English text | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import time | import math | ||||||
| 
 | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||||
| import snowballstemmer | import snowballstemmer | ||||||
| 
 | 
 | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order |  | ||||||
| 
 |  | ||||||
| # 定义一个全局的res_d, 记录数据库单词评级之后的单词及其等级 |  | ||||||
| res_d = {} |  | ||||||
| 
 | 
 | ||||||
| def load_record(pickle_fname): | def load_record(pickle_fname): | ||||||
|     f = open(pickle_fname, 'rb') |     f = open(pickle_fname, 'rb') | ||||||
|  | @ -28,7 +24,6 @@ def convert_test_type_to_difficulty_level(d): | ||||||
|     :param d: 存储了单词库pickle文件中的单词的字典 |     :param d: 存储了单词库pickle文件中的单词的字典 | ||||||
|     :return: |     :return: | ||||||
|     """ |     """ | ||||||
|     time_start = time.time() |  | ||||||
|     result = {} |     result = {} | ||||||
|     L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word |     L = list(d.keys())  # in d, we have test types (e.g., CET4,CET6,BBC) for each word | ||||||
| 
 | 
 | ||||||
|  | @ -43,10 +38,7 @@ def convert_test_type_to_difficulty_level(d): | ||||||
|             result[k] = 7 |             result[k] = 7 | ||||||
|         elif 'BBC' in d[k]: |         elif 'BBC' in d[k]: | ||||||
|             result[k] = 8 |             result[k] = 8 | ||||||
|     time_end = time.time() | 
 | ||||||
|     print('convert_test_type_to_difficulty_level totally cost', time_end - time_start) |  | ||||||
|     global res_d |  | ||||||
|     res_d = result |  | ||||||
|     return result  # {'apple': 4, ...} |     return result  # {'apple': 4, ...} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -56,12 +48,8 @@ def get_difficulty_level_for_user(d1, d2): | ||||||
|     d1 用户不会的词 |     d1 用户不会的词 | ||||||
|     在d2的后面添加单词,没有新建一个新的字典 |     在d2的后面添加单词,没有新建一个新的字典 | ||||||
|     """ |     """ | ||||||
|     time_start = time.time() |  | ||||||
|     # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed. |     # TODO: convert_test_type_to_difficulty_level() should not be called every time.  Each word's difficulty level should be pre-computed. | ||||||
|     if res_d == {}: |  | ||||||
|     d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} |     d2 = convert_test_type_to_difficulty_level(d2)  # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...} | ||||||
|     else: |  | ||||||
|         d2 = res_d |  | ||||||
|     stemmer = snowballstemmer.stemmer('english') |     stemmer = snowballstemmer.stemmer('english') | ||||||
| 
 | 
 | ||||||
|     for k in d1:  # 用户的词 |     for k in d1:  # 用户的词 | ||||||
|  | @ -73,8 +61,6 @@ def get_difficulty_level_for_user(d1, d2): | ||||||
|                 d2[k] = d2[stem]  # 按照词根进行评级 |                 d2[k] = d2[stem]  # 按照词根进行评级 | ||||||
|             else: |             else: | ||||||
|                 d2[k] = 3  # 如果k的词根都不在,那么就当认为是3级 |                 d2[k] = 3  # 如果k的词根都不在,那么就当认为是3级 | ||||||
|     time_end = time.time() |  | ||||||
|     print('get_difficulty_level_for_user totally cost', time_end - time_start) |  | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -29,8 +29,3 @@ function onReadClick() { | ||||||
| function onChooseClick() { | function onChooseClick() { | ||||||
|     isChoose = !isChoose; |     isChoose = !isChoose; | ||||||
| } | } | ||||||
| 
 |  | ||||||
| // 如果网页刷新,停止播放声音
 |  | ||||||
| if (performance.getEntriesByType("navigation")[0].type == "reload") { |  | ||||||
|     Reader.stopRead(); |  | ||||||
| } |  | ||||||
|  | @ -22,38 +22,62 @@ function getWord() { | ||||||
| 
 | 
 | ||||||
| function highLight() { | function highLight() { | ||||||
|     if (!isHighlight) return; |     if (!isHighlight) return; | ||||||
|     let articleContent = document.getElementById("article").innerHTML; // innerHTML保留HTML标签来保持部分格式,且适配不同的浏览器
 |     let articleContent = document.getElementById("article").innerText; //将原来的.innerText改为.innerHtml,使用innerText会把原文章中所包含的<br>标签去除,导致处理后的文章内容失去了原来的格式
 | ||||||
|     let pickedWords = document.getElementById("selected-words");  // words picked to the text area
 |     let pickedWords = document.getElementById("selected-words");  // words picked to the text area
 | ||||||
|     let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
 |     let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
 | ||||||
|     let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value; |     let allWords = "";  //初始化allWords的值,避免进入判断后编译器认为allWords未初始化的问题
 | ||||||
|     const list = allWords.split(" "); // 将所有的生词放入一个list中
 |     if(dictionaryWords != null){//增加一个判断,检查生词本里面是否为空,如果为空,allWords只添加选中的单词
 | ||||||
|     let totalSet = new Set(); |         allWords = pickedWords.value + " " + dictionaryWords.value; | ||||||
|  |     } | ||||||
|  |     else{ | ||||||
|  |         allWords = pickedWords.value + " "; | ||||||
|  |     } | ||||||
|  |     const list = allWords.split(" ");//将所有的生词放入一个list中,用于后续处理
 | ||||||
|     for (let i = 0; i < list.length; ++i) { |     for (let i = 0; i < list.length; ++i) { | ||||||
|         list[i] = list[i].replace(/(^\W*)|(\W*$)/g, ""); // 消除单词两边的非单词字符
 |         list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); //消除单词两边的空字符
 | ||||||
|         if (list[i] != "" && !totalSet.has(list[i])) { |         list[i] = list[i].replace('|', ""); | ||||||
|             // 返回所有匹配单词的集合, 正则表达式RegExp()中, "\b"匹配一个单词的边界, g 表示全局匹配, i 表示对大小写不敏感。
 |         list[i] = list[i].replace('?', ""); | ||||||
|             let matches = new Set(articleContent.match(new RegExp("\\b" + list[i] + "\\b", "gi"))); |         if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) { | ||||||
|             if (matches.has("mark")) { |            //将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。 正则表达式RegExp()中,"\\b"代表单词边界匹配。
 | ||||||
|                 // 优先处理单词为 "mark" 的情况
 | 
 | ||||||
|                 totalSet = new Set(["mark", ...totalSet]); |             //修改代码
 | ||||||
|             } |             let articleContent_fb = articleContent;  //文章副本
 | ||||||
|             totalSet = new Set([...totalSet, ...matches]); |             while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){ | ||||||
|  |                 //找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
 | ||||||
|  |                 const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()); | ||||||
|  |                 list[i] = articleContent_fb.substring(index, index + list[i].length); | ||||||
|  | 
 | ||||||
|  |                 articleContent_fb = articleContent_fb.substring(index + list[i].length);    // 使用副本中list[i]之后的子串替换掉副本
 | ||||||
|  |                 articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>"); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     // 删除所有的mark标签,防止标签发生嵌套
 |  | ||||||
|     articleContent = articleContent.replace(/<(mark)[^>]*>/gi, ""); |  | ||||||
|     articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, ""); |  | ||||||
|     // 将文章中所有出现该单词word的地方改为:"<mark>" + word + "<mark>"。
 |  | ||||||
|     for (let word of totalSet) { |  | ||||||
|         articleContent = articleContent.replace(new RegExp("\\b" + word + "\\b", "g"), "<mark>" + word + "</mark>"); |  | ||||||
|     } |     } | ||||||
|     document.getElementById("article").innerHTML = articleContent; |     document.getElementById("article").innerHTML = articleContent; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function cancelHighlighting() { | function cancelHighlighting() { | ||||||
|     let articleContent = document.getElementById("article").innerHTML; |     let articleContent = document.getElementById("article").innerText;//将原来的.innerText改为.innerHtml,原因同上
 | ||||||
|     articleContent = articleContent.replace(/<(mark)[^>]*>/gi, ""); |     let pickedWords = document.getElementById("selected-words"); | ||||||
|     articleContent = articleContent.replace(/<(\/mark)[^>]*>/gi, ""); |     const dictionaryWords = document.getElementById("selected-words2");     | ||||||
|  |     const list = pickedWords.value.split(" ");     | ||||||
|  |     if (pickedWords != null) { | ||||||
|  |         for (let i = 0; i < list.length; ++i) { | ||||||
|  |             list[i] = list[i].replace(/(^\s*)|(\s*$)/g, ""); | ||||||
|  |             if (list[i] !== "") { //原来判断的代码中,替换的内容为“list[i]”这个字符串,这明显是错误的,我们需要替换的是list[i]里的内容
 | ||||||
|  |                 articleContent = articleContent.replace(new RegExp("<mark>"+list[i]+"</mark>", "g"), list[i]); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (dictionaryWords != null) { | ||||||
|  |         let list2 = pickedWords.value.split(" "); | ||||||
|  |         for (let i = 0; i < list2.length; ++i) { | ||||||
|  |             list2 = dictionaryWords.value.split(" "); | ||||||
|  |             list2[i] = list2[i].replace(/(^\s*)|(\s*$)/g, ""); | ||||||
|  |             if (list2[i] !== "") { //原来代码中,替换的内容为“list[i]”这个字符串,这明显是错误的,我们需要替换的是list[i]里的内容
 | ||||||
|  |                 articleContent = articleContent.replace(new RegExp("<mark>"+list2[i]+"</mark>", "g"), list2[i]); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     document.getElementById("article").innerHTML = articleContent; |     document.getElementById("article").innerHTML = articleContent; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -9,7 +9,7 @@ var Reader = (function() { | ||||||
|         msg.rate = rate; |         msg.rate = rate; | ||||||
|         msg.lang = "en-US"; |         msg.lang = "en-US"; | ||||||
|         msg.onboundary = ev => { |         msg.onboundary = ev => { | ||||||
|             if (ev.name === "word") { |             if (ev.name == "word") { | ||||||
|                 current_position = ev.charIndex; |                 current_position = ev.charIndex; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ function familiar(theWord) { | ||||||
|             let new_freq = freq - 1; |             let new_freq = freq - 1; | ||||||
|             const allow_move = document.getElementById("move_dynamiclly").checked; |             const allow_move = document.getElementById("move_dynamiclly").checked; | ||||||
|             if (allow_move) { |             if (allow_move) { | ||||||
|  | 
 | ||||||
|                 if (new_freq <= 0) { |                 if (new_freq <= 0) { | ||||||
|                     removeWord(theWord); |                     removeWord(theWord); | ||||||
|                 } else { |                 } else { | ||||||
|  | @ -113,7 +114,7 @@ function removeWord(word) { | ||||||
|     // 根据词频信息删除元素
 |     // 根据词频信息删除元素
 | ||||||
|     word = word.replace('&', '&'); |     word = word.replace('&', '&'); | ||||||
|     const element_to_remove = document.getElementById(`p_${word}`); |     const element_to_remove = document.getElementById(`p_${word}`); | ||||||
|     if (element_to_remove !== null) { |     if (element_to_remove != null) { | ||||||
|         element_to_remove.remove(); |         element_to_remove.remove(); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | @ -128,7 +129,7 @@ function renderWord(word) { | ||||||
|     for (const current of container.children) { |     for (const current of container.children) { | ||||||
|         const cur_word = parseWord(current); |         const cur_word = parseWord(current); | ||||||
|         // 找到第一个词频比它小的元素,插入到这个元素前面
 |         // 找到第一个词频比它小的元素,插入到这个元素前面
 | ||||||
|         if (compareWord(cur_word, word) === -1) { |         if (compareWord(cur_word, word) == -1) { | ||||||
|             container.insertBefore(new_element, current); |             container.insertBefore(new_element, current); | ||||||
|             inserted = true; |             inserted = true; | ||||||
|             break; |             break; | ||||||
|  | @ -164,11 +165,17 @@ function elementFromString(string) { | ||||||
|  *  当first大于second时返回1 |  *  当first大于second时返回1 | ||||||
|  */ |  */ | ||||||
| function compareWord(first, second) { | function compareWord(first, second) { | ||||||
|     if (first.freq !== second.freq) { |     if (first.freq < second.freq) { | ||||||
|         return first.freq < second.freq ? -1 : 1; |         return -1; | ||||||
|     } |     } | ||||||
|     if (first.word !== second.word) { |     if (first.freq > second.freq) { | ||||||
|         return first.word < second.word ? -1 : 1; |         return 1; | ||||||
|  |     } | ||||||
|  |     if (first.word < second.word) { | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  |     if (first.word > second.word) { | ||||||
|  |         return 1; | ||||||
|     } |     } | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  | @ -32,17 +32,6 @@ | ||||||
|             40%, 60% { transform: translate3d(+4px, 0, 0); } |             40%, 60% { transform: translate3d(+4px, 0, 0); } | ||||||
|             50% { transform: translate3d(-4px, 0, 0); } |             50% { transform: translate3d(-4px, 0, 0); } | ||||||
|         } |         } | ||||||
| 
 |  | ||||||
|         .lead{ |  | ||||||
|             font-size: 17px; |  | ||||||
|             font-family: Arial, Helvetica, sans-serif; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         .display-5{ |  | ||||||
|             font-size: 30px; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|     </style> |     </style> | ||||||
| </head> | </head> | ||||||
| <body> | <body> | ||||||
|  | @ -60,22 +49,18 @@ | ||||||
| {#        <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#} | {#        <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#} | ||||||
| {#    {% endfor %}#} | {#    {% endfor %}#} | ||||||
| 
 | 
 | ||||||
|         <button class="btn btn-success" id="load_next_article" onclick="load_next_article();Reader.stopRead()"> 下一篇 Next Article </button> |         <button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button> | ||||||
|         <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" > 上一篇 Previous Article </button> |         <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button> | ||||||
| 
 | 
 | ||||||
|     <p><b>阅读文章并回答问题</b></p> |     <p><b>阅读文章并回答问题</b></p> | ||||||
|     <div id="text-content"> |     <div id="text-content"> | ||||||
|         <div id="found"> |         <div id="found"> | ||||||
|             <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div> |             <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div> | ||||||
|                 <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/> |                 <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/> | ||||||
|             <div class="p-3 mb-2 bg-light text-dark" style="margin: 0 0.5%;"><br/> |             <div class="p-3 mb-2 bg-light text-dark"><br/> | ||||||
|             <p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/> |             <p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/> | ||||||
|             <p class="lead"><font id="article">{{ today_article["article_body"] }}</font></p><br/> |             <p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/> | ||||||
|             <div> |  | ||||||
|                 <h6>source: </h6> |  | ||||||
|             <p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/> |             <p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/> | ||||||
|             </div> |  | ||||||
|              |  | ||||||
|             <p><b id="question">{{ today_article['question'] }}</b></p><br/> |             <p><b id="question">{{ today_article['question'] }}</b></p><br/> | ||||||
|                 <script type="text/javascript"> |                 <script type="text/javascript"> | ||||||
|                     function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#} |                     function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#} | ||||||
|  | @ -112,7 +97,7 @@ | ||||||
|     <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p> |     <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p> | ||||||
|     <form method="post" action="/{{ username }}/userpage"> |     <form method="post" action="/{{ username }}/userpage"> | ||||||
|         <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/> |         <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/> | ||||||
|         <input type="submit" value="把生词加入我的生词库" onclick="Reader.stopRead()"/> |         <input type="submit" value="把生词加入我的生词库"/> | ||||||
|         <input type="reset" value="清除"/> |         <input type="reset" value="清除"/> | ||||||
|     </form> |     </form> | ||||||
|     {% if session.get['thisWord'] %} |     {% if session.get['thisWord'] %} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue