Bug511-Bosh #144

Closed
bosh wants to merge 10 commits from Bug511-Bosh into Alpha-snapshot20230621OK
7 changed files with 235 additions and 51 deletions

12
app/WordFreq.py Normal file → Executable file
View File

@ -2,24 +2,24 @@
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
import string
class WordFreq:
def __init__(self, s):
def __init__(self, s, max_word_length=30):
self.s = remove_punctuation(s)
self.max_word_length = max_word_length
def get_freq(self):
lst = []
for t in freq(self.s):
for t in freq(self.s, self.max_word_length):
word = t[0]
if len(word) > 0 and word[0] in string.ascii_letters:
lst.append(t)
return sort_in_descending_order(lst)
if __name__ == '__main__':
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
print(f.get_freq())

63
app/templates/userpage_get.html Normal file → Executable file
View File

@ -86,7 +86,7 @@
<div>
<p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
</div>
<p><b id="question">{{ today_article['question'] }}</b></p><br/>
<script type="text/javascript">
function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
@ -109,22 +109,22 @@
</div>
</div>
<input type="checkbox" onclick="toggleHighlighting()" checked/>生词高亮
<input type="checkbox" onclick="onReadClick()" checked/>大声朗读
<input type="checkbox" onclick="onChooseClick()" checked/>划词入库
<input type="checkbox" id="highlightCheckbox" onclick="toggleHighlighting()" />生词高亮
<input type="checkbox" id="readCheckbox" onclick="onReadClick()" />大声朗读
<input type="checkbox" id="chooseCheckbox" onclick="onChooseClick()" />划词入库
<div class="range">
<div class="field">
<div class="sliderValue">
<span id="rangeValue">1×</span>
</div>
<input type="range" id="rangeComponent" min="0.5" max="2" value="1" step="0.25"/>
<input type="range" id="rangeComponent" min="0.5" max="2" value="1" step="0.25" />
</div>
</div>
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
<form method="post" action="/{{ username }}/userpage">
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
<button class="btn btn-primary btn-lg" type="submit" onclick="Reader.stopRead()">把生词加入我的生词库</button>
<button class="btn btn-primary btn-lg" type="reset">清除</button>
<button class="btn btn-primary btn-lg" type="reset" onclick="clearSelectedWords()">清除</button>
</form>
{% if session.get['thisWord'] %}
<script type="text/javascript">
@ -139,7 +139,7 @@
{% if d_len > 0 %}
<p>
<b>我的生词簿</b>
<b>我的生词簿</b>
<label for="move_dynamiclly">
<input type="checkbox" name="move_dynamiclly" id="move_dynamiclly" checked>
允许动态调整顺序
@ -174,11 +174,54 @@
{% endif %}
<script type="text/javascript">
window.onload = function () { // 页面加载时执行
const settings = {
// initialize settings from localStorage
highlightChecked: localStorage.getItem('highlightChecked') !== 'false', // localStorage stores strings, default to true. same below
readChecked: localStorage.getItem('readChecked') !== 'false',
chooseChecked: localStorage.getItem('chooseChecked') !== 'false',
rangeValue: localStorage.getItem('rangeValue') || '1',
selectedWords: localStorage.getItem('selectedWords') || ''
};
const elements = {
highlightCheckbox: document.querySelector('#highlightCheckbox'),
readCheckbox: document.querySelector('#readCheckbox'),
chooseCheckbox: document.querySelector('#chooseCheckbox'),
rangeComponent: document.querySelector('#rangeComponent'),
rangeValueDisplay: document.querySelector('#rangeValue'),
selectedWordsInput: document.querySelector('#selected-words')
};
// 应用设置到页面元素
elements.highlightCheckbox.checked = settings.highlightChecked;
elements.readCheckbox.checked = settings.readChecked;
elements.chooseCheckbox.checked = settings.chooseChecked;
elements.rangeComponent.value = settings.rangeValue;
elements.rangeValueDisplay.textContent = `${settings.rangeValue}x`;
elements.selectedWordsInput.value = settings.selectedWords;
// 刷新页面或进入页面时判断,若不是首篇文章,则上一篇按钮可见
if(sessionStorage.getItem('pre_page_button')!="display" && sessionStorage.getItem('pre_page_button')){
if (sessionStorage.getItem('pre_page_button') !== 'display' && sessionStorage.getItem('pre_page_button')) {
$('#load_pre_article').show();
}
};
// 事件监听器
elements.selectedWordsInput.addEventListener('input', () => {
localStorage.setItem('selectedWords', elements.selectedWordsInput.value);
});
elements.rangeComponent.addEventListener('input', () => {
const rangeValue = elements.rangeComponent.value;
elements.rangeValueDisplay.textContent = `${rangeValue}x`;
localStorage.setItem('rangeValue', rangeValue);
});
};
function clearSelectedWords() {
localStorage.removeItem('selectedWords');
document.querySelector('#selected-words').value = '';
}
function load_next_article(){
$.ajax({
url: '/get_next_article/{{username}}',
@ -248,7 +291,7 @@
</body>
<style>
mark {
color: #{{ yml['highlight']['color'] }};
color: red;
background-color: rgba(0, 0, 0, 0);
}
</style>

View File

@ -0,0 +1,85 @@
''' Contributed by Lin Junhong et al. 2023-06.'''
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import UnexpectedAlertPresentException, NoAlertPresentException
import random, time
import string
# 初始化webdriver
# driver = webdriver.Remote('http://localhost:4444/wd/hub', DesiredCapabilities.CHROME)
# driver.implicitly_wait(10)
driver = webdriver.Chrome("C:\\Users\\12993\AppData\Local\Programs\Python\Python38\\chromedriver.exe")
def test_next_article():
try:
driver.get("http://118.25.96.118:90")
assert 'English Pal -' in driver.page_source
# login
elem = driver.find_element_by_link_text('登录')
elem.click()
uname = 'abcdefg'
password = 'abcdefg'
elem = driver.find_element_by_id('username')
elem.send_keys(uname)
elem = driver.find_element_by_id('password')
elem.send_keys(password)
elem = driver.find_element_by_xpath('/html/body/div/button') # 找到登录按钮
elem.click()
time.sleep(0.5)
assert 'EnglishPal Study Room for ' + uname in driver.title
for i in range(50):
time.sleep(0.1)
# 找到固定按钮
elem = driver.find_element_by_xpath('//*[@id="load_next_article"]')
elem.click()
except Exception as e:
print(e)
def test_local_next_article():
try:
driver.get("http://127.0.0.1:5000")
assert 'English Pal -' in driver.page_source
# login
elem = driver.find_element_by_link_text('注册')
elem.click()
uname = 'abcdefg'
password = 'abcdefg'
elem = driver.find_element_by_id('username')
elem.send_keys(uname)
elem = driver.find_element_by_id('password')
elem.send_keys(password)
elem = driver.find_element_by_id('password2')
elem.send_keys(password)
time.sleep(0.5)
elem = driver.find_element_by_class_name('btn') # 找到提交按钮
elem.click()
time.sleep(0.5)
try:
WebDriverWait(driver, 1).until(EC.alert_is_present())
driver.switch_to.alert.accept()
except (UnexpectedAlertPresentException, NoAlertPresentException):
pass
time.sleep(0.5)
assert 'EnglishPal Study Room for ' + uname in driver.title
for i in range(50):
time.sleep(0.1)
# 找到固定按钮
elem = driver.find_element_by_xpath('//*[@id="load_next_article"]')
elem.click()
except Exception as e:
print(e)

56
app/test_bug511_Bosh.py Executable file
View File

@ -0,0 +1,56 @@
###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
import unittest
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
from WordFreq import WordFreq
class TestWordFrequency(unittest.TestCase):
def test_word_frequency_normal_case(self):
text = "BANANA; Banana, apple ORANGE Banana banana."
wf = WordFreq(text)
result = wf.get_freq()
expected = [('banana', 4), ('orange', 1), ('apple', 1)]
self.assertEqual(result, expected)
def test_word_frequency_with_long_word(self):
text = "apple banana " + "a" * 31 + " orange banana apple"
wf = WordFreq(text, max_word_length=30)
result = wf.get_freq()
expected = [('banana', 2), ('apple', 2), ('orange', 1)]
self.assertEqual(result, expected)
def test_word_frequency_all_long_words(self):
text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33
wf = WordFreq(text, max_word_length=30)
result = wf.get_freq()
expected = []
self.assertEqual(result, expected)
def test_word_frequency_with_punctuation(self):
text = "Hello, world! Hello... hello; 'hello' --world--"
wf = WordFreq(text)
result = wf.get_freq()
expected = [('hello', 4), ('world', 2)]
self.assertEqual(result, expected)
def test_word_frequency_empty_string(self):
text = ""
wf = WordFreq(text)
result = wf.get_freq()
expected = []
self.assertEqual(result, expected)
def test_word_frequency_with_max_length_parameter(self):
text = "apple banana apple"
wf = WordFreq(text, max_word_length=5)
result = wf.get_freq()
expected = [('apple', 2)]
self.assertEqual(result, expected)
if __name__ == '__main__':
unittest.main()

68
app/wordfreqCMD.py Normal file → Executable file
View File

@ -2,56 +2,56 @@
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
import collections
import string
import operator
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。
import os
import sys
import pickle_idea
def freq(fruit):
def freq(fruit, max_word_length=30):
'''
功能 把字符串转成列表 目的是得到每个单词的频率
输入 字符串
输出 列表 列表里包含一组元组每个元组包含单词与单词的频率 比如 [('apple', 2), ('banana', 1)]
注意事项 首先要把字符串转小写原因是
注意事项 首先要把字符串转小写
'''
result = []
fruit = fruit.lower() # 字母转小写
fruit = fruit.lower() # 字母转小写
flst = fruit.split() # 字符串转成list
c = collections.Counter(flst)
result = c.most_common()
for word, count in c.most_common():
if len(word) <= max_word_length:
result.append((word, count))
return result
def youdao_link(s): # 有道链接
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
def youdao_link(s): # 有道链接
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
return link
def file2str(fname):#文件转字符
f = open(fname) #打开
s = f.read() #读取
f.close() #关闭
def file2str(fname): # 文件转字符
with open(fname) as f: # 使用with打开文件
s = f.read() # 读取
return s
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
def remove_punctuation(s): # 这里是s是形参(parameter)。函数被调用时才给s赋值。
special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ')
s = s.strip() # 去除前后的空格
s = s.strip() # 去除前后的空格
if '\'' in s:
n = len(s)
t = '' # 用来收集我需要保留的字符
for i in range(n): # 只有单引号前后都有英文字符,才保留
t = '' # 用来收集我需要保留的字符
for i in range(n): # 只有单引号前后都有英文字符,才保留
if s[i] == '\'':
i_is_ok = i - 1 >= 0 and i + 1 < n
if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters:
if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters:
t += s[i]
else:
t += s[i]
@ -60,12 +60,12 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
return s
def sort_in_descending_order(lst):# 单词按频率降序排列
def sort_in_descending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
return lst2
def sort_in_ascending_order(lst):# 单词按频率降序排列
def sort_in_ascending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
return lst2
@ -80,31 +80,30 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调
# <a href="">word</a>
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
count += 1
f = open(fname, 'w')
f.write(s)
f.close()
with open(fname, 'w') as f:
f.write(s)
## main程序入口
if __name__ == '__main__':
num = len(sys.argv)
if num == 1: # 从键盘读入字符串
if num == 1: # 从键盘读入字符串
s = input()
elif num == 2: # 从文件读入字符串
elif num == 2: # 从文件读入字符串
fname = sys.argv[1]
s = file2str(fname)
else:
print('I can accept at most 2 arguments.')
sys.exit()# 结束程序运行, 下面的代码不会被执行了。
sys.exit() # 结束程序运行,下面的代码不会被执行了。
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
L = freq(s)
for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
# 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html')
make_html_page(sort_in_descending_order(L), 'result.html')
print('\nHistory:\n')
if os.path.exists('frequency.p'):
@ -120,4 +119,3 @@ if __name__ == '__main__':
pickle_idea.save_frequency_to_pickle(d, 'frequency.p')

View File

@ -4,3 +4,5 @@ PyYAML~=6.0
pony==0.7.16
snowballstemmer==2.2.0
Werkzeug==2.2.2
pytest~=8.1.1