1
0
Fork 0

Compare commits

..

6 Commits

Author SHA1 Message Date
汪瑜 b486b6b9db 新增可被分隔的中文符号 2023-05-12 23:22:52 +08:00
汪瑜 0fedf590e8 更新 'app/wordfreqCMD.py' 2023-05-04 17:40:44 +08:00
汪瑜 96dfadcde6 更新 'app/wordfreqCMD.py' 2023-05-04 17:39:48 +08:00
杨昱晨 7b55fc1859 first commit 2023-05-04 17:18:04 +08:00
杨昱晨 ac2046ac2e first commit 2023-05-04 17:02:28 +08:00
whiost baa1c45782 [Bugfix] 部分中文符号不会被分隔 2022-12-08 16:03:38 +08:00
12 changed files with 163 additions and 469 deletions

View File

@ -7,7 +7,7 @@ import random, glob
import hashlib
from datetime import datetime
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
path_prefix = '/var/www/wordfreq/wordfreq/'
@ -53,7 +53,7 @@ def get_today_article(user_word_list, visited_articles):
# Choose article according to reader's level
d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
d3 = get_difficulty_level_for_user(d1, d2)
d3 = get_difficulty_level(d1, d2)
d = None
result_of_generate_article = "not found"

View File

@ -8,7 +8,6 @@
import pickle
import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer
def load_record(pickle_fname):
@ -18,51 +17,41 @@ def load_record(pickle_fname):
return d
def convert_test_type_to_difficulty_level(d):
"""
对原本的单词库中的单词进行难度评级
:param d: 存储了单词库pickle文件中的单词的字典
:return:
"""
result = {}
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
def difficulty_level_from_frequency(word, d):
level = 1
if not word in d:
return level
if 'what' in d:
ratio = (d['what']+1)/(d[word]+1) # what is a frequent word
level = math.log( max(ratio, 1), 2)
for k in L:
if 'CET4' in d[k]:
result[k] = 4 # CET4 word has level 4
elif 'OXFORD3000' in d[k]:
result[k] = 5
elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
result[k] = 6
elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
result[k] = 7
elif 'BBC' in d[k]:
result[k] = 8
return result # {'apple': 4, ...}
level = min(level, 8)
return level
def get_difficulty_level_for_user(d1, d2):
"""
d2 来自于词库的35511个已标记单词
d1 用户不会的词
在d2的后面添加单词没有新建一个新的字典
"""
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
stemmer = snowballstemmer.stemmer('english')
def get_difficulty_level(d1, d2):
d = {}
L = list(d1.keys()) # in d1, we have freuqence for each word
L2 = list(d2.keys()) # in d2, we have test types (e.g., CET4,CET6,BBC) for each word
L.extend(L2)
L3 = list(set(L)) # L3 contains all words
for k in L3:
if k in d2:
if 'CET4' in d2[k]:
d[k] = 4 # CET4 word has level 4
elif 'CET6' in d2[k]:
d[k] = 6
elif 'BBC' in d2[k]:
d[k] = 8
if k in d1: # BBC could contain easy words that are not in CET4 or CET6. So 4 is not reasonable. Recompute difficulty level.
d[k] = min(difficulty_level_from_frequency(k, d1), d[k])
elif k in d1:
d[k] = difficulty_level_from_frequency(k, d1)
for k in d1: # 用户的词
if k in d2: # 如果用户的词以原型的形式存在于词库d2中
continue # 无需评级,跳过
else:
stem = stemmer.stemWord(k)
if stem in d2: # 如果用户的词的词根存在于词库d2的词根库中
d2[k] = d2[stem] # 按照词根进行评级
else:
d2[k] = 3 # 如果k的词根都不在那么就当认为是3级
return d2
return d
def revert_dict(d):
'''
@ -73,13 +62,12 @@ def revert_dict(d):
for k in d:
if type(d[k]) is list: # d[k] is a list of dates.
lst = d[k]
elif type(d[
k]) is int: # for backward compatibility. d was sth like {'word':1}. The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
elif type(d[k]) is int: # for backward compatibility. d was sth like {'word':1}. The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
freq = d[k]
lst = freq * ['2021082019'] # why choose this date? No particular reasons. I fix the bug in this date.
lst = freq*['2021082019'] # why choose this date? No particular reasons. I fix the bug in this date.
for time_info in lst:
date = time_info[:10] # until hour
date = time_info[:10] # until hour
if not date in d2:
d2[date] = [k]
else:
@ -88,44 +76,42 @@ def revert_dict(d):
def user_difficulty_level(d_user, d):
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = 0
geometric = 1
for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
if word in d:
lst2.append((word, d[word]))
lst3 = sort_in_ascending_order(lst2) # easiest tuple first
# print(lst3)
lst3 = sort_in_ascending_order(lst2) # easiest tuple first
#print(lst3)
for t in lst3:
word = t[0]
hard = t[1]
# print('WORD %s HARD %4.2f' % (word, hard))
#print('WORD %s HARD %4.2f' % (word, hard))
geometric = geometric * (hard)
count += 1
if count >= 10:
return geometric ** (1 / count)
return geometric**(1/count)
return geometric ** (1 / max(count, 1))
return geometric**(1/max(count,1))
def text_difficulty_level(s, d):
s = remove_punctuation(s)
L = freq(s)
lst = [] # a list of tuples, each tuple being (word, difficulty level)
stop_words = {'the':1, 'and':1, 'of':1, 'to':1, 'what':1, 'in':1, 'there':1, 'when':1, 'them':1, 'would':1, 'will':1, 'out':1, 'his':1, 'mr':1, 'that':1, 'up':1, 'more':1, 'your':1, 'it':1, 'now':1, 'very':1, 'then':1, 'could':1, 'he':1, 'any':1, 'some':1, 'with':1, 'into':1, 'you':1, 'our':1, 'man':1, 'other':1, 'time':1, 'was':1, 'than':1, 'know':1, 'about':1, 'only':1, 'like':1, 'how':1, 'see':1, 'is':1, 'before':1, 'such':1, 'little':1, 'two':1, 'its':1, 'as':1, 'these':1, 'may':1, 'much':1, 'down':1, 'for':1, 'well':1, 'should':1, 'those':1, 'after':1, 'same':1, 'must':1, 'say':1, 'first':1, 'again':1, 'us':1, 'great':1, 'where':1, 'being':1, 'come':1, 'over':1, 'good':1, 'himself':1, 'am':1, 'never':1, 'on':1, 'old':1, 'here':1, 'way':1, 'at':1, 'go':1, 'upon':1, 'have':1, 'had':1, 'without':1, 'my':1, 'day':1, 'be':1, 'but':1, 'though':1, 'from':1, 'not':1, 'too':1, 'another':1, 'this':1, 'even':1, 'still':1, 'her':1, 'yet':1, 'under':1, 'by':1, 'let':1, 'just':1, 'all':1, 'because':1, 'we':1, 'always':1, 'off':1, 'yes':1, 'so':1, 'while':1, 'why':1, 'which':1, 'me':1, 'are':1, 'or':1, 'no':1, 'if':1, 'an':1, 'also':1, 'thus':1, 'who':1, 'cannot':1, 'she':1, 'whether':1} # ignore these words while computing the artile's difficulty level
lst = [] # a list of tuples, each tuple being (word, difficulty level)
for x in L:
word = x[0]
if word not in stop_words and word in d:
if word in d:
lst.append((word, d[word]))
lst2 = sort_in_descending_order(lst) # most difficult words on top
# print(lst2)
lst2 = sort_in_descending_order(lst) # most difficult words on top
#print(lst2)
count = 0
geometric = 1
for t in lst2:
@ -133,20 +119,24 @@ def text_difficulty_level(s, d):
hard = t[1]
geometric = geometric * (hard)
count += 1
if count >= 20: # we look for n most difficult words
return geometric ** (1 / count)
if count >= 20: # we look for n most difficult words
return geometric**(1/count)
return geometric**(1/max(count,1))
return geometric ** (1 / max(count, 1))
if __name__ == '__main__':
d1 = load_record('frequency.p')
# print(d1)
#print(d1)
d2 = load_record('words_and_tests.p')
# print(d2)
#print(d2)
d3 = get_difficulty_level_for_user(d1, d2)
d3 = get_difficulty_level(d1, d2)
s = '''
South Lawn
@ -207,6 +197,7 @@ Amidst the aftermath of this shocking referendum vote, there is great uncertaint
'''
s = '''
British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE)
@ -227,6 +218,7 @@ The prime minister was forced to ask for an extension to Britain's EU departure
Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues.
'''
s = '''
Thank you very much. We have a Cabinet meeting. Well have a few questions after grace. And, if you would, Ben, please do the honors.
@ -241,11 +233,17 @@ We need — for our farmers, our manufacturers, for, frankly, unions and non-uni
'''
# f = open('bbc-fulltext/bbc/entertainment/001.txt')
#f = open('bbc-fulltext/bbc/entertainment/001.txt')
f = open('wordlist.txt')
s = f.read()
f.close()
print(text_difficulty_level(s, d3))

View File

@ -7,7 +7,6 @@ css:
js:
head: # 在页面加载之前加载
- ../static/js/jquery.js
- ../static/js/read.js
- ../static/js/word_operation.js
bottom: # 在页面加载完之后加载
- ../static/js/fillword.js

View File

@ -1,5 +1,9 @@
let isRead = true;
let isChoose = true;
let reader = window.speechSynthesis; // 全局定义朗读者,以便朗读和暂停
let current_position = 0; // 朗读文本的当前位置
let original_position = 0; // 朗读文本的初始位置
let to_speak = ""; // 朗读的初始内容
function getWord() {
return window.getSelection ? window.getSelection() : document.selection.createRange().text;
@ -7,7 +11,7 @@ function getWord() {
function fillInWord() {
let word = getWord();
if (isRead) Reader.read(word, inputSlider.value);
if (isRead) read(word);
if (!isChoose) return;
const element = document.getElementById("selected-words");
element.value = element.value + " " + word;
@ -15,17 +19,50 @@ function fillInWord() {
document.getElementById("text-content").addEventListener("click", fillInWord, false);
const sliderValue = document.getElementById("rangeValue");
const inputSlider = document.getElementById("rangeComponent");
function makeUtterance(str, rate) {
let msg = new SpeechSynthesisUtterance(str);
msg.rate = rate;
msg.lang = "en-US"; // TODO: add language options menu
msg.onboundary = ev => {
if (ev.name == "word") {
current_position = ev.charIndex;
}
}
return msg;
}
const sliderValue = document.getElementById("rangeValue"); // 显示值
const inputSlider = document.getElementById("rangeComponent"); // 滑块元素
inputSlider.oninput = () => {
let value = inputSlider.value;
let value = inputSlider.value; // 获取滑块的值
sliderValue.textContent = value + '×';
if (!reader.speaking) return;
reader.cancel();
let msg = makeUtterance(to_speak.substring(original_position + current_position), value);
original_position = original_position + current_position;
current_position = 0;
reader.speak(msg);
};
function read(s) {
to_speak = s.toString();
original_position = 0;
current_position = 0;
let msg = makeUtterance(to_speak, inputSlider.value);
reader.speak(msg);
}
function onReadClick() {
isRead = !isRead;
if (!isRead) {
reader.cancel();
}
}
function onChooseClick() {
isChoose = !isChoose;
}
function stopRead() {
reader.cancel();
}

View File

@ -1,35 +0,0 @@
var Reader = (function() {
let reader = window.speechSynthesis;
let current_position = 0;
let original_position = 0;
let to_speak = "";
function makeUtterance(str, rate) {
let msg = new SpeechSynthesisUtterance(str);
msg.rate = rate;
msg.lang = "en-US";
msg.onboundary = ev => {
if (ev.name == "word") {
current_position = ev.charIndex;
}
}
return msg;
}
function read(s, rate) {
to_speak = s.toString();
original_position = 0;
current_position = 0;
let msg = makeUtterance(to_speak, rate);
reader.speak(msg);
}
function stopRead() {
reader.cancel();
}
return {
read: read,
stopRead: stopRead
};
})();

View File

@ -62,13 +62,6 @@ function delete_word(theWord) {
});
}
function read_word(theWord) {
let to_speak = $("#word_" + theWord).text();
original_position = 0;
current_position = 0;
Reader.read(to_speak, inputSlider.value);
}
/*
* interface Word {
* word: string,
@ -102,7 +95,6 @@ function wordTemplate(word) {
<a class="btn btn-success" onclick="familiar('${word.word}')" role="button">熟悉</a>
<a class="btn btn-warning" onclick="unfamiliar('${word.word}')" role="button">不熟悉</a>
<a class="btn btn-danger" onclick="delete_word('${word.word}')" role="button">删除</a>
<a class="btn btn-info" onclick="read_word('${word.word}')" role="button">朗读</a>
</p>`;
}

Binary file not shown.

View File

@ -5,8 +5,6 @@
<meta name="viewport"
content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes"/>
<meta name="format-detection" content="telephone=no"/>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/js/bootstrap.bundle.min.js"></script>
{{ yml['header'] | safe }}
{% if yml['css']['item'] %}
@ -28,40 +26,12 @@
}
@keyframes shakes {
10%, 90% {
transform: translate3d(-1px, 0, 0);
}
20%, 50% {
transform: translate3d(+2px, 0, 0);
}
30%, 70% {
transform: translate3d(-4px, 0, 0);
}
40%, 60% {
transform: translate3d(+4px, 0, 0);
}
50% {
transform: translate3d(-4px, 0, 0);
}
10%, 90% { transform: translate3d(-1px, 0, 0); }
20%, 50% { transform: translate3d(+2px, 0, 0); }
30%, 70% { transform: translate3d(-4px, 0, 0); }
40%, 60% { transform: translate3d(+4px, 0, 0); }
50% { transform: translate3d(-4px, 0, 0); }
}
.lead {
font-size: 22px;
font-family: Helvetica, sans-serif;
white-space: pre-wrap;
}
.arrow {
padding: 0;
font-size: 20px;
line-height: 21px;
display: inline-block;
}
.arrow:hover {
cursor: pointer;
}
</style>
</head>
<body>
@ -69,70 +39,53 @@
<p><b>English Pal for <font id="username" color="red">{{ username }}</font></b>
{% if username == admin_name %}
<a class="btn btn-secondary" href="/admin" role="button" onclick="stopRead()">管理</a>
<a class="btn btn-secondary" href="/admin" role="button" onclick="stopRead()">管理</a>
{% endif %}
<a id="quit" class="btn btn-secondary" href="/logout" role="button" onclick="stopRead()">退出</a>
<a class="btn btn-secondary" href="/reset" role="button" onclick="stopRead()">重设密码</a>
</p>
{% for message in get_flashed_messages() %}
<div class="alert alert-warning alert-dismissible fade show" role="alert">
{{ message }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endfor %}
{# {% for message in flashed_messages %}#} {# 根据user_service.userpage,取消了参数flashed_messages因此注释了这段代码 #}
{# <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
{# {% endfor %}#}
<button class="arrow" id="load_next_article" onclick="load_next_article();Reader.stopRead()"
title="下一篇 Next Article">⇨
</button>
<button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" style="display: none"
title="上一篇 Previous Article">⇦
</button>
<button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button>
<button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button>
<p><b>阅读文章并回答问题</b></p>
<div id="text-content">
<div id="found">
<div class="alert alert-success" role="alert">According to your word list, your level is <span
class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we
have chosen an article with a difficulty level of <span class="text-decoration-underline"
id="text_level">{{ today_article["text_level"] }}</span>
for you.
</div>
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
<div class="p-3 mb-2 bg-light text-dark" style="margin: 0 0.5%;"><br/>
<p class="display-6" id="article_title">{{ today_article["article_title"] }}</p><br/>
<p class="lead"><font id="article">{{ today_article["article_body"] }}</font></p><br/>
<div>
<p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
</div>
<p><b id="question">{{ today_article['question'] }}</b></p><br/>
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
<div class="p-3 mb-2 bg-light text-dark"><br/>
<p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
<p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/>
<p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
<p><b id="question">{{ today_article['question'] }}</b></p><br/>
<script type="text/javascript">
function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
const e = document.getElementById(id);
if (e.style.display === 'block')
if(e.style.display === 'block')
e.style.display = 'none';
else
e.style.display = 'block';
}
</script>
<button onclick="toggle_visibility('answer');">ANSWER</button>
<div id="answer" style="display:none;">{{ today_article['answer'] }}</div>
<br/>
<div id="answer" style="display:none;">{{ today_article['answer'] }}</div><br/>
</div>
</div>
<div class="alert alert-success" role="alert" id="not_found" style="display:none;">
<p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for
you. You can try again a few times or mark new words in the passage to improve your level.</p>
<p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for you. You can try again a few times or mark new words in the passage to improve your level.</p>
</div>
<div class="alert alert-success" role="alert" id="read_all" style="display:none;">
<p class="text-muted"><span class="badge bg-success">Notes:</span><br>You've read all the articles.</p>
</div>
</div>
<input type="checkbox" id="highlightCheckbox" onclick="toggleHighlighting()"/>生词高亮
<input type="checkbox" id="readCheckbox" onclick="onReadClick()"/>大声朗读
<input type="checkbox" id="chooseCheckbox" onclick="onChooseClick()"/>划词入库
<input type="checkbox" onclick="toggleHighlighting()" checked/>生词高亮
<input type="checkbox" onclick="onReadClick()" checked/>大声朗读
<input type="checkbox" onclick="onChooseClick()" checked/>划词入库
<div class="range">
<div class="field">
<div class="sliderValue">
@ -144,8 +97,8 @@
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
<form method="post" action="/{{ username }}/userpage">
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
<button class="btn btn-primary btn-lg" type="submit" onclick="Reader.stopRead()">把生词加入我的生词库</button>
<button class="btn btn-primary btn-lg" type="reset" onclick="clearSelectedWords()">清除</button>
<input type="submit" value="把生词加入我的生词库"/>
<input type="reset" value="清除"/>
</form>
{% if session.get['thisWord'] %}
<script type="text/javascript">
@ -160,7 +113,7 @@
{% if d_len > 0 %}
<p>
<b>我的生词簿</b>
<b>我的生词簿</b>
<label for="move_dynamiclly">
<input type="checkbox" name="move_dynamiclly" id="move_dynamiclly" checked>
允许动态调整顺序
@ -173,15 +126,13 @@
{% set freq = x[1] %}
{% if session.get('thisWord') == x[0] and session.get('time') == 1 %}
{% endif %}
<p id='p_{{ word }}' class="new-word">
<a id="word_{{ word }}" class="btn btn-light"
href='http://youdao.com/w/eng/{{ word }}/#keyfrom=dict2.index'
role="button">{{ word }}</a>
<p id='p_{{ word }}' class="new-word" >
<a id="word_{{ word }}" class="btn btn-light" href='http://youdao.com/w/eng/{{ word }}/#keyfrom=dict2.index'
role="button">{{ word }}</a>
( <a id="freq_{{ word }}" title="{{ word }}">{{ freq }}</a> )
<a class="btn btn-success" onclick="familiar('{{ word }}')" role="button">熟悉</a>
<a class="btn btn-warning" onclick="unfamiliar('{{ word }}')" role="button">不熟悉</a>
<a class="btn btn-danger" onclick="delete_word('{{ word }}')" role="button">删除</a>
<a class="btn btn-info" onclick="read_word('{{ word }}')" role="button">朗读</a>
</p>
{% endfor %}
</div>
@ -195,127 +146,60 @@
{% endfor %}
{% endif %}
<script type="text/javascript">
window.onload = function () { // 页面加载时执行
const settings = {
// initialize settings from localStorage
highlightChecked: localStorage.getItem('highlightChecked') !== 'false', // localStorage stores strings, default to true. same below
readChecked: localStorage.getItem('readChecked') !== 'false',
chooseChecked: localStorage.getItem('chooseChecked') !== 'false',
rangeValue: localStorage.getItem('rangeValue') || '1',
selectedWords: localStorage.getItem('selectedWords') || ''
};
const elements = {
highlightCheckbox: document.querySelector('#highlightCheckbox'),
readCheckbox: document.querySelector('#readCheckbox'),
chooseCheckbox: document.querySelector('#chooseCheckbox'),
rangeComponent: document.querySelector('#rangeComponent'),
rangeValueDisplay: document.querySelector('#rangeValue'),
selectedWordsInput: document.querySelector('#selected-words')
};
// 应用设置到页面元素
elements.highlightCheckbox.checked = settings.highlightChecked;
elements.readCheckbox.checked = settings.readChecked;
elements.chooseCheckbox.checked = settings.chooseChecked;
elements.rangeComponent.value = settings.rangeValue;
elements.rangeValueDisplay.textContent = `${settings.rangeValue}x`;
elements.selectedWordsInput.value = settings.selectedWords;
// 刷新页面或进入页面时判断,若不是首篇文章,则上一篇按钮可见
if (sessionStorage.getItem('pre_page_button') !== 'display' && sessionStorage.getItem('pre_page_button')) {
$('#load_pre_article').show();
}
// 事件监听器
elements.selectedWordsInput.addEventListener('input', () => {
localStorage.setItem('selectedWords', elements.selectedWordsInput.value);
});
elements.rangeComponent.addEventListener('input', () => {
const rangeValue = elements.rangeComponent.value;
elements.rangeValueDisplay.textContent = `${rangeValue}x`;
localStorage.setItem('rangeValue', rangeValue);
});
};
function clearSelectedWords() {
localStorage.removeItem('selectedWords');
document.querySelector('#selected-words').value = '';
}
function load_next_article() {
$("#load_next_article").prop("disabled", true)
function load_next_article(){
$.ajax({
url: '/get_next_article/{{username}}',
dataType: 'json',
success: function (data) {
success: function(data) {
// 更新页面内容
if (data['today_article']) {
if(data['today_article']){
update(data['today_article']);
check_pre(data['visited_articles']);
check_next(data['result_of_generate_article']);
}
}, complete: function (xhr, status) {
$("#load_next_article").prop("disabled", false)
}
});
}
function load_pre_article() {
function load_pre_article(){
$.ajax({
url: '/get_pre_article/{{username}}',
dataType: 'json',
success: function (data) {
success: function(data) {
// 更新页面内容
if (data['today_article']) {
if(data['today_article']){
update(data['today_article']);
check_pre(data['visited_articles']);
}
}
});
}
function update(today_article) {
$('#user_level').html(today_article['user_level']);
function update(today_article){
$('#user-level').html(today_article['user_level']);
$('#text_level').html(today_article["text_level"]);
$('#date').html('Article added on: ' + today_article["date"]);
$('#date').html('Article added on: '+today_article["date"]);
$('#article_title').html(today_article["article_title"]);
$('#article').html(today_article["article_body"]);
$('#source').html(today_article['source']);
$('#question').html(today_article["question"]);
$('#answer').html(today_article["answer"]);
document.querySelector('#text_level').classList.add('mark'); // highlight text difficult level for 2 seconds
setTimeout(() => {
document.querySelector('#text_level').classList.remove('mark');
}, 2000);
document.querySelector('#user_level').classList.add('mark'); // do the same thing for user difficulty level
setTimeout(() => {
document.querySelector('#user_level').classList.remove('mark');
}, 2000);
}
<!-- 检查是否存在上一篇或下一篇,不存在则对应按钮隐藏-->
function check_pre(visited_articles) {
if ((visited_articles == '') || (visited_articles['index'] <= 0)) {
<!-- 检查是否存在上一篇或下一篇,不存在则对应按钮隐藏-->
function check_pre(visited_articles){
if((visited_articles=='')||(visited_articles['index']<=0)){
$('#load_pre_article').hide();
sessionStorage.setItem('pre_page_button', 'display')
} else {
}else{
$('#load_pre_article').show();
sessionStorage.setItem('pre_page_button', 'show')
}
}
function check_next(result_of_generate_article) {
if (result_of_generate_article == "found") {
$('#found').show();
$('#not_found').hide();
function check_next(result_of_generate_article){
if(result_of_generate_article == "found"){
$('#found').show();$('#not_found').hide();
$('#read_all').hide();
} else if (result_of_generate_article == "not found") {
}else if(result_of_generate_article == "not found"){
$('#found').hide();
$('#not_found').show();
$('#read_all').hide();
} else {
}else{
$('#found').hide();
$('#not_found').hide();
$('#read_all').show();

View File

@ -1,85 +0,0 @@
''' Contributed by Lin Junhong et al. 2023-06.'''
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import UnexpectedAlertPresentException, NoAlertPresentException
import random, time
import string
# 初始化webdriver
# driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
# driver.implicitly_wait(10)
driver = webdriver.Chrome("C:\\Users\\12993\AppData\Local\Programs\Python\Python38\\chromedriver.exe")
def test_next_article():
try:
driver.get("http://118.25.96.118:90")
assert 'English Pal -' in driver.page_source
# login
elem = driver.find_element_by_link_text('登录')
elem.click()
uname = 'abcdefg'
password = 'abcdefg'
elem = driver.find_element_by_id('username')
elem.send_keys(uname)
elem = driver.find_element_by_id('password')
elem.send_keys(password)
elem = driver.find_element_by_xpath('/html/body/div/button') # 找到登录按钮
elem.click()
time.sleep(0.5)
assert 'EnglishPal Study Room for ' + uname in driver.title
for i in range(50):
time.sleep(0.1)
# 找到固定按钮
elem = driver.find_element_by_xpath('//*[@id="load_next_article"]')
elem.click()
except Exception as e:
print(e)
def test_local_next_article():
try:
driver.get("http://127.0.0.1:5000")
assert 'English Pal -' in driver.page_source
# login
elem = driver.find_element_by_link_text('注册')
elem.click()
uname = 'abcdefg'
password = 'abcdefg'
elem = driver.find_element_by_id('username')
elem.send_keys(uname)
elem = driver.find_element_by_id('password')
elem.send_keys(password)
elem = driver.find_element_by_id('password2')
elem.send_keys(password)
time.sleep(0.5)
elem = driver.find_element_by_class_name('btn') # 找到提交按钮
elem.click()
time.sleep(0.5)
try:
WebDriverWait(driver, 1).until(EC.alert_is_present())
driver.switch_to.alert.accept()
except (UnexpectedAlertPresentException, NoAlertPresentException):
pass
time.sleep(0.5)
assert 'EnglishPal Study Room for ' + uname in driver.title
for i in range(50):
time.sleep(0.1)
# 找到固定按钮
elem = driver.find_element_by_xpath('//*[@id="load_next_article"]')
elem.click()
except Exception as e:
print(e)

View File

@ -39,7 +39,8 @@ def file2str(fname):#文件转字符
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|《》【】、!¥();:?。,' # 把里面的字符都去掉
for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ')
@ -103,6 +104,7 @@ if __name__ == '__main__':
for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
# 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html')
@ -117,6 +119,7 @@ if __name__ == '__main__':
# 合并频率
lst_history = pickle_idea.dict2lst(d)
d = pickle_idea.merge_frequency(L, lst_history)
pickle_idea.save_frequency_to_pickle(d, 'frequency.p')

View File

@ -1,8 +1,3 @@
Flask==2.0.3
Flask==1.1.2
selenium==3.141.0
PyYAML~=6.0
pony==0.7.16
snowballstemmer==2.2.0
Werkzeug==2.2.2
pytest~=8.1.1

View File

@ -1,94 +0,0 @@
# Run this test script on the command line:
# pytest test_vocabulary.py
#
# Last modified by Mr Lan Hui on 2025-03-05
from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
def test_article_level_empty_content():
''' Boundary case test '''
article = ArticleVocabularyLevel('')
assert article.level == 0
def test_article_level_punctuation_only():
''' Boundary case test '''
article = ArticleVocabularyLevel(',')
assert article.level == 0
def test_article_level_digit_only():
''' Boundary case test '''
article = ArticleVocabularyLevel('1')
assert article.level == 0
def test_article_level_single_word():
''' Boundary case test '''
article = ArticleVocabularyLevel('source')
assert 2 <= article.level <= 4
def test_article_level_subset_vs_superset():
''' Boundary case test '''
article1 = ArticleVocabularyLevel('source')
article2 = ArticleVocabularyLevel('open source')
assert article1.level < article2.level
def test_article_level_multiple_words():
''' Boundary case test '''
article = ArticleVocabularyLevel('Producing Open Source Software - How to Run a Successful Free Software Project')
assert 3 <= article.level <= 5
def test_article_level_short_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('At parties, people no longer give me a blank stare when I tell them I work in open source software. "Oh, yes — like Linux?" they say. I nod eagerly in agreement. "Yes, exactly! That\'s what I do." It\'s nice not to be completely fringe anymore. In the past, the next question was usually fairly predictable: "How do you make money doing that?" To answer, I\'d summarize the economics of free software: that there are organizations in whose interest it is to have certain software exist, but that they don\'t need to sell copies, they just want to make sure the software is available and maintained, as a tool instead of as a rentable monopoly.')
assert 4 <= article.level <= 6
def test_article_level_medium_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('In considering the Origin of Species, it is quite conceivable that a naturalist, reflecting on the mutual affinities of organic beings, on their embryological relations, their geographical distribution, geological succession, and other such facts, might come to the conclusion that each species had not been independently created, but had descended, like varieties, from other species. Nevertheless, such a conclusion, even if well founded, would be unsatisfactory, until it could be shown how the innumerable species inhabiting this world have been modified, so as to acquire that perfection of structure and coadaptation which most justly excites our admiration. Naturalists continually refer to external conditions, such as climate, food, etc., as the only possible cause of variation. In one very limited sense, as we shall hereafter see, this may be true; but it is preposterous to attribute to mere external conditions, the structure, for instance, of the woodpecker, with its feet, tail, beak, and tongue, so admirably adapted to catch insects under the bark of trees. In the case of the misseltoe, which draws its nourishment from certain trees, which has seeds that must be transported by certain birds, and which has flowers with separate sexes absolutely requiring the agency of certain insects to bring pollen from one flower to the other, it is equally preposterous to account for the structure of this parasite, with its relations to several distinct organic beings, by the effects of external conditions, or of habit, or of the volition of the plant itself.')
assert 5 <= article.level <= 7
def test_article_level_long_paragraph():
''' Boundary case test '''
article = ArticleVocabularyLevel('These several facts accord well with my theory. I believe in no fixed law of development, causing all the inhabitants of a country to change abruptly, or simultaneously, or to an equal degree. The process of modification must be extremely slow. The variability of each species is quite independent of that of all others. Whether such variability be taken advantage of by natural selection, and whether the variations be accumulated to a greater or lesser amount, thus causing a greater or lesser amount of modification in the varying species, depends on many complex contingencies,—on the variability being of a beneficial nature, on the power of intercrossing, on the rate of breeding, on the slowly changing physical conditions of the country, and more especially on the nature of the other inhabitants with which the varying species comes into competition. Hence it is by no means surprising that one species should retain the same identical form much longer than others; or, if changing, that it should change less. We see the same fact in geographical distribution; for instance, in the land-shells and coleopterous insects of Madeira having come to differ considerably from their nearest allies on the continent of Europe, whereas the marine shells and birds have remained unaltered. We can perhaps understand the apparently quicker rate of change in terrestrial and in more highly organised productions compared with marine and lower productions, by the more complex relations of the higher beings to their organic and inorganic conditions of life, as explained in a former chapter. When many of the inhabitants of a country have become modified and improved, we can understand, on the principle of competition, and on that of the many all-important relations of organism to organism, that any form which does not become in some degree modified and improved, will be liable to be exterminated. Hence we can see why all the species in the same region do at last, if we look to wide enough intervals of time, become modified; for those which do not change will become extinct.')
assert 6 <= article.level <= 8
def test_user_level_empty_dictionary():
''' Boundary case test '''
user = UserVocabularyLevel({})
assert user.level == 0
def test_user_level_one_simple_word():
''' Boundary case test '''
user = UserVocabularyLevel({'simple':['202408050930']})
assert 0 < user.level <= 4
def test_user_level_invalid_word():
''' Boundary case test '''
user = UserVocabularyLevel({'xyz':['202408050930']})
assert user.level == 0
def test_user_level_one_hard_word():
''' Boundary case test '''
user = UserVocabularyLevel({'pasture':['202408050930']})
assert 5 <= user.level <= 8
def test_user_level_multiple_words():
''' Boundary case test '''
user = UserVocabularyLevel(
{'sessile': ['202408050930'], 'putrid': ['202408050930'], 'prodigal': ['202408050930'], 'presumptuous': ['202408050930'], 'prehension': ['202408050930'], 'pied': ['202408050930'], 'pedunculated': ['202408050930'], 'pasture': ['202408050930'], 'parturition': ['202408050930'], 'ovigerous': ['202408050930'], 'ova': ['202408050930'], 'orifice': ['202408050930'], 'obliterate': ['202408050930'], 'niggard': ['202408050930'], 'neuter': ['202408050930'], 'locomotion': ['202408050930'], 'lineal': ['202408050930'], 'glottis': ['202408050930'], 'frivolous': ['202408050930'], 'frena': ['202408050930'], 'flotation': ['202408050930'], 'ductus': ['202408050930'], 'dorsal': ['202408050930'], 'dearth': ['202408050930'], 'crustacean': ['202408050930'], 'cornea': ['202408050930'], 'contrivance': ['202408050930'], 'collateral': ['202408050930'], 'cirriped': ['202408050930'], 'canon': ['202408050930'], 'branchiae': ['202408050930'], 'auditory': ['202408050930'], 'articulata': ['202408050930'], 'alimentary': ['202408050930'], 'adduce': ['202408050930'], 'aberration': ['202408050930']}
)
assert 6 <= user.level <= 8
def test_user_level_consider_only_most_recent_words_difficult_words_most_recent():
''' Consider only the most recent three words '''
user = UserVocabularyLevel(
{'pasture':['202408050930'], 'putrid': ['202408040000'], 'frivolous':['202408030000'], 'simple':['202408020000'], 'apple':['202408010000']}
)
assert 5 <= user.level <= 8
def test_user_level_consider_only_most_recent_words_easy_words_most_recent():
''' Consider only the most recent three words '''
user = UserVocabularyLevel(
{'simple':['202408050930'], 'apple': ['202408040000'], 'happy':['202408030000'], 'pasture':['202408020000'], 'putrid':['202408010000'], 'dearth':['202407310000']}
)
assert 4 <= user.level <= 5