BugFix358 + Improve + Refactor

Bug358-TengJiaQian
Stela 2022-06-06 19:42:26 +08:00
parent b9cf94da74
commit 3654776aec
6 changed files with 136 additions and 28 deletions

View File

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
<title>EnglishPal - Analyse Result</title>
</head>
<body>
<div>
<br/>
<h3>单词 {{ word }} 的统计信息</h3>
<hr/>
<ul>
<li>在总 {{ resultlen }} 篇文章中,总共出现了 {{ totcount }} 次</li>
<li>总占比为 {{ '%.5f' % (100 * totcount / totcnt) }}%</li>
</ul>
<ul>
<li>在一篇文章中:</li>
<li>最多出现了 {{ maxcount }} 次</li>
<li>最高占比为 {{ '%.5f' % maxrate }}%</li>
<li>最少出现了 {{ mincount }} 次</li>
<li>最低占比为 {{ '%.5f' % minrate }}%</li>
</ul>
<input type="button" value="返回" onclick="location.href='{{ username }}'"/>
</div>
</body>
</html>

View File

@ -38,10 +38,17 @@
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p> <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
<form method="post" action="/{{ username }}"> <form method="post" action="/{{ username }}">
<input type="hidden" name="methodtype" value="multiple"/>
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/> <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
<input type="submit" value="把生词加入我的生词库"/> <input type="submit" value="把生词加入我的生词库"/>
<input type="reset" value="清除"/> <input type="reset" value="清除"/>
</form> </form><br/>
<form method="post" action="/{{ username }}">
<input type="hidden" name="methodtype" value="single"/>
<input type="text" name="content"/><br/>
<input type="submit" value="分析此单词" style="margin-top:5px"/>
<input type="reset" value="清除"/>
</form><br/>
{% if session.get['thisWord'] %} {% if session.get['thisWord'] %}
<script type="text/javascript"> <script type="text/javascript">
//point to the anchor in the page whose id is aaa if it exists //point to the anchor in the page whose id is aaa if it exists

View File

@ -31,6 +31,9 @@
<a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a> <a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a>
({{x[1]}}) ({{x[1]}})
<input type="checkbox" name="marked" value={{word}}> <input type="checkbox" name="marked" value={{word}}>
{% if x[0] in userwordlist %}
&nbsp; <font color="red"><b>已在生词簿内</b></font>
{% endif %}
</p> </p>
{% endfor %} {% endfor %}

View File

@ -11,6 +11,7 @@ import Yaml
from Article import get_today_article, load_freq_history from Article import get_today_article, load_freq_history
from WordFreq import WordFreq from WordFreq import WordFreq
from wordfreqCMD import sort_in_descending_order from wordfreqCMD import sort_in_descending_order
from UseSqlite import RecordQuery
import pickle_idea import pickle_idea
import pickle_idea2 import pickle_idea2
@ -102,10 +103,36 @@ def userpage(username):
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
if request.method == 'POST': # when we submit a form if request.method == 'POST': # when we submit a form
if request.form['methodtype'] == 'multiple':
content = request.form['content'] content = request.form['content']
f = WordFreq(content) f = WordFreq(content)
lst = f.get_freq() lst = f.get_freq()
return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml) userwordlist = pickle_idea2.dict2lst(load_freq_history(user_freq_record))
userwordlist2 = []
for t in userwordlist:
userwordlist2.append(t[0])
return render_template('userpage_post.html', username=username, lst = lst, yml=Yaml.yml, userwordlist=userwordlist2)
else:
word = request.form['content'].lower() # 指定单词
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
rq.instructions("SELECT * FROM article") # 获取所有文章
rq.do()
result = rq.get_results()
mincount, maxcount, totcount, totcnt = 1e9, 0, 0, 0
maxrate, minrate = 0.0, 100.0
for d in result: # 查找数据库内的所有文章
str = d['text'].lower().split()
cnt = str.count(word)
tot = len(str)
totcnt += tot
mincount = min(mincount, cnt)
maxcount = max(maxcount, cnt)
totcount += cnt
maxrate = max(maxrate, 100 * cnt / tot)
minrate = min(minrate, 100 * cnt / tot)
return render_template('analyse_word.html', mincount=mincount, maxcount=maxcount, totcount=totcount, totcnt=totcnt,
maxrate=maxrate, minrate=minrate, resultlen=len(result),
word=word, username=session.get('username'))
elif request.method == 'GET': # when we load a html page elif request.method == 'GET': # when we load a html page
d = load_freq_history(user_freq_record) d = load_freq_history(user_freq_record)
@ -127,10 +154,6 @@ def userpage(username):
yml=Yaml.yml, yml=Yaml.yml,
words=words) words=words)
@userService.route("/<username>/mark", methods=['GET', 'POST']) @userService.route("/<username>/mark", methods=['GET', 'POST'])
def user_mark_word(username): def user_mark_word(username):
''' '''

View File

@ -18,7 +18,6 @@ def freq(fruit):
''' '''
result = [] result = []
fruit = fruit.lower() # 字母转小写 fruit = fruit.lower() # 字母转小写
flst = fruit.split() # 字符串转成list flst = fruit.split() # 字符串转成list
c = collections.Counter(flst) c = collections.Counter(flst)
@ -27,24 +26,63 @@ def freq(fruit):
def youdao_link(s): # 有道链接 def youdao_link(s): # 有道链接
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
return link return link
def file2str(fname):#文件转字符 def file2str(fname): # 文件转字符
f = open(fname) #打开 f = open(fname) # 打开
s = f.read() #读取 s = f.read() # 读取
f.close() #关闭 f.close() # 关闭
return s return s
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉 special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—' # 把里面的字符都去掉
for c in special_characters: for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace(c, ' ') # 把所有符号都替换成空格,防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ') s = s.replace('--', ' ')
s = s.strip() # 去除前后的空格 s = s.strip() # 去除前后的空格
single_quote = '\'' # 各种单引号单独处理
n, i = len(s), 0
t = '' # 用来收集我需要保留的字符
while i < n: # 只有单引号前后都有英文字符,才保留
if s[i] in single_quote:
if i == 0 or i == n - 1 or s[i - 1] == ' ' or s[i + 1] == ' ':
i = i + 1
continue # condition 1+2
if s[i + 1] == 's' and (i + 2 == n or s[i + 2] == ' '):
i = i + 2
continue # condition 2
t += '\'' # condition 3, standardize quote
else:
t += s[i]
i = i + 1
return t
'''
单引号出现在文章中的情况
1某些情况下作为双引号使用引用段落
这种情况一般出现在词首或词尾
处理方式直接去除
2表示名词所有格
对于单数名词以's为后缀对于复数名词以s''为后缀
处理方式'其后的部分去除
3单词元音位置的缩写
最常见的有not->n't/is->'s/have->'ve这类
处理方式保留
上述处理方式2/3两点可能产生一种冲突
某些单词元音缩写后恰好以's结尾
但考虑到用于学习英语的文章一般不会出现过于口语化的缩写单词
因此要么还是表所有格要么就是is的缩写
故不考虑这种冲突情况
'''
'''
以下是原本的代码
if '\'' in s: if '\'' in s:
n = len(s) n = len(s)
t = '' # 用来收集我需要保留的字符 t = '' # 用来收集我需要保留的字符
@ -58,6 +96,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
return t return t
else: else:
return s return s
'''
def sort_in_descending_order(lst):# 单词按频率降序排列 def sort_in_descending_order(lst):# 单词按频率降序排列
@ -74,16 +113,25 @@ def make_html_page(lst, fname):
''' '''
功能把lst的信息存到fname中以html格式 功能把lst的信息存到fname中以html格式
''' '''
s = '' result = ''
count = 1 id = 1
for x in lst:
# <a href="">word</a> for word in lst:
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1]) result += '<p>'
count += 1 result += '%d ' % id
result += getHyperlinkHTML(word[0])
result += ' (%d)' % word[1]
result += '</p>'
# result += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
id += 1
f = open(fname, 'w') f = open(fname, 'w')
f.write(s) f.write(result)
f.close() f.close()
def getHyperlinkHTML(word):
s = '<a href="' + youdao_link(word) + '">' + word + '</a>'
return s
## main程序入口 ## main程序入口
if __name__ == '__main__': if __name__ == '__main__':
@ -96,12 +144,12 @@ if __name__ == '__main__':
s = file2str(fname) s = file2str(fname)
else: else:
print('I can accept at most 2 arguments.') print('I can accept at most 2 arguments.')
sys.exit()# 结束程序运行, 下面的代码不会被执行了。 sys.exit() # 结束程序运行, 下面的代码不会被执行了。
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
L = freq(s) L = freq(s)
for x in sort_in_descending_order(L): for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
# 把频率的结果放result.html中 # 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html') make_html_page(sort_in_descending_order(L), 'result.html')

View File

@ -1,3 +1,3 @@
Flask==1.1.2 Flask==2.1.2
selenium==3.141.0 selenium==4.2.0
PyYAML~=6.0 PyYAML~=6.0