forked from mrlan/EnglishPal
Compare commits
4 Commits
Bug358-Ten
...
master
Author | SHA1 | Date |
---|---|---|
缪宸硕 | e48008550a | |
李凯 | fde3be4c23 | |
李凯 | 5d5f4cf8f2 | |
miaochenshuo | 260f62967b |
|
@ -5,6 +5,10 @@ from UseSqlite import InsertQuery, RecordQuery
|
||||||
path_prefix = '/var/www/wordfreq/wordfreq/'
|
path_prefix = '/var/www/wordfreq/wordfreq/'
|
||||||
path_prefix = './' # comment this line in deployment
|
path_prefix = './' # comment this line in deployment
|
||||||
|
|
||||||
|
def verify_pass(newpass,oldpass):
|
||||||
|
if(newpass==oldpass):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def verify_user(username, password):
|
def verify_user(username, password):
|
||||||
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
|
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
|
||||||
|
@ -47,6 +51,8 @@ def change_password(username, old_password, new_password):
|
||||||
if not verify_user(username, old_password): # 旧密码错误
|
if not verify_user(username, old_password): # 旧密码错误
|
||||||
return False
|
return False
|
||||||
# 将用户名和密码一起加密,以免暴露不同用户的相同密码
|
# 将用户名和密码一起加密,以免暴露不同用户的相同密码
|
||||||
|
if verify_pass(new_password,old_password): #新旧密码一致
|
||||||
|
return False
|
||||||
password = md5(username + new_password)
|
password = md5(username + new_password)
|
||||||
rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
|
rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
|
||||||
rq.instructions_with_parameters("UPDATE user SET password=:password WHERE name=:username", dict(
|
rq.instructions_with_parameters("UPDATE user SET password=:password WHERE name=:username", dict(
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
|
|
||||||
<title>EnglishPal - Analyse Result</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div>
|
|
||||||
<br/>
|
|
||||||
<h3>单词 {{ word }} 的统计信息</h3>
|
|
||||||
<hr/>
|
|
||||||
<ul>
|
|
||||||
<li>在总 {{ resultlen }} 篇文章中,总共出现了 {{ totcount }} 次</li>
|
|
||||||
<li>总占比为 {{ '%.5f' % (100 * totcount / totcnt) }}%</li>
|
|
||||||
</ul>
|
|
||||||
<ul>
|
|
||||||
<li>在一篇文章中:</li>
|
|
||||||
<li>最多出现了 {{ maxcount }} 次</li>
|
|
||||||
<li>最高占比为 {{ '%.5f' % maxrate }}%</li>
|
|
||||||
<li>最少出现了 {{ mincount }} 次</li>
|
|
||||||
<li>最低占比为 {{ '%.5f' % minrate }}%</li>
|
|
||||||
</ul>
|
|
||||||
<input type="button" value="返回" onclick="location.href='{{ username }}'"/>
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -38,17 +38,10 @@
|
||||||
|
|
||||||
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
|
<p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
|
||||||
<form method="post" action="/{{ username }}">
|
<form method="post" action="/{{ username }}">
|
||||||
<input type="hidden" name="methodtype" value="multiple"/>
|
|
||||||
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
|
<textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
|
||||||
<input type="submit" value="把生词加入我的生词库"/>
|
<input type="submit" value="把生词加入我的生词库"/>
|
||||||
<input type="reset" value="清除"/>
|
<input type="reset" value="清除"/>
|
||||||
</form><br/>
|
</form>
|
||||||
<form method="post" action="/{{ username }}">
|
|
||||||
<input type="hidden" name="methodtype" value="single"/>
|
|
||||||
<input type="text" name="content"/><br/>
|
|
||||||
<input type="submit" value="分析此单词" style="margin-top:5px"/>
|
|
||||||
<input type="reset" value="清除"/>
|
|
||||||
</form><br/>
|
|
||||||
{% if session.get['thisWord'] %}
|
{% if session.get['thisWord'] %}
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
//point to the anchor in the page whose id is aaa if it exists
|
//point to the anchor in the page whose id is aaa if it exists
|
||||||
|
|
|
@ -31,9 +31,6 @@
|
||||||
<a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a>
|
<a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a>
|
||||||
({{x[1]}})
|
({{x[1]}})
|
||||||
<input type="checkbox" name="marked" value={{word}}>
|
<input type="checkbox" name="marked" value={{word}}>
|
||||||
{% if x[0] in userwordlist %}
|
|
||||||
<font color="red"><b>已在生词簿内</b></font>
|
|
||||||
{% endif %}
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
@ -11,7 +11,6 @@ import Yaml
|
||||||
from Article import get_today_article, load_freq_history
|
from Article import get_today_article, load_freq_history
|
||||||
from WordFreq import WordFreq
|
from WordFreq import WordFreq
|
||||||
from wordfreqCMD import sort_in_descending_order
|
from wordfreqCMD import sort_in_descending_order
|
||||||
from UseSqlite import RecordQuery
|
|
||||||
|
|
||||||
import pickle_idea
|
import pickle_idea
|
||||||
import pickle_idea2
|
import pickle_idea2
|
||||||
|
@ -103,36 +102,10 @@ def userpage(username):
|
||||||
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
|
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
|
||||||
|
|
||||||
if request.method == 'POST': # when we submit a form
|
if request.method == 'POST': # when we submit a form
|
||||||
if request.form['methodtype'] == 'multiple':
|
content = request.form['content']
|
||||||
content = request.form['content']
|
f = WordFreq(content)
|
||||||
f = WordFreq(content)
|
lst = f.get_freq()
|
||||||
lst = f.get_freq()
|
return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml)
|
||||||
userwordlist = pickle_idea2.dict2lst(load_freq_history(user_freq_record))
|
|
||||||
userwordlist2 = []
|
|
||||||
for t in userwordlist:
|
|
||||||
userwordlist2.append(t[0])
|
|
||||||
return render_template('userpage_post.html', username=username, lst = lst, yml=Yaml.yml, userwordlist=userwordlist2)
|
|
||||||
else:
|
|
||||||
word = request.form['content'].lower() # 指定单词
|
|
||||||
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
|
|
||||||
rq.instructions("SELECT * FROM article") # 获取所有文章
|
|
||||||
rq.do()
|
|
||||||
result = rq.get_results()
|
|
||||||
mincount, maxcount, totcount, totcnt = 1e9, 0, 0, 0
|
|
||||||
maxrate, minrate = 0.0, 100.0
|
|
||||||
for d in result: # 查找数据库内的所有文章
|
|
||||||
str = d['text'].lower().split()
|
|
||||||
cnt = str.count(word)
|
|
||||||
tot = len(str)
|
|
||||||
totcnt += tot
|
|
||||||
mincount = min(mincount, cnt)
|
|
||||||
maxcount = max(maxcount, cnt)
|
|
||||||
totcount += cnt
|
|
||||||
maxrate = max(maxrate, 100 * cnt / tot)
|
|
||||||
minrate = min(minrate, 100 * cnt / tot)
|
|
||||||
return render_template('analyse_word.html', mincount=mincount, maxcount=maxcount, totcount=totcount, totcnt=totcnt,
|
|
||||||
maxrate=maxrate, minrate=minrate, resultlen=len(result),
|
|
||||||
word=word, username=session.get('username'))
|
|
||||||
|
|
||||||
elif request.method == 'GET': # when we load a html page
|
elif request.method == 'GET': # when we load a html page
|
||||||
d = load_freq_history(user_freq_record)
|
d = load_freq_history(user_freq_record)
|
||||||
|
@ -154,6 +127,10 @@ def userpage(username):
|
||||||
yml=Yaml.yml,
|
yml=Yaml.yml,
|
||||||
words=words)
|
words=words)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@userService.route("/<username>/mark", methods=['GET', 'POST'])
|
@userService.route("/<username>/mark", methods=['GET', 'POST'])
|
||||||
def user_mark_word(username):
|
def user_mark_word(username):
|
||||||
'''
|
'''
|
||||||
|
|
|
@ -18,6 +18,7 @@ def freq(fruit):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
fruit = fruit.lower() # 字母转小写
|
fruit = fruit.lower() # 字母转小写
|
||||||
flst = fruit.split() # 字符串转成list
|
flst = fruit.split() # 字符串转成list
|
||||||
c = collections.Counter(flst)
|
c = collections.Counter(flst)
|
||||||
|
@ -26,63 +27,24 @@ def freq(fruit):
|
||||||
|
|
||||||
|
|
||||||
def youdao_link(s): # 有道链接
|
def youdao_link(s): # 有道链接
|
||||||
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
|
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
|
||||||
return link
|
return link
|
||||||
|
|
||||||
|
|
||||||
def file2str(fname): # 文件转字符
|
def file2str(fname):#文件转字符
|
||||||
f = open(fname) # 打开
|
f = open(fname) #打开
|
||||||
s = f.read() # 读取
|
s = f.read() #读取
|
||||||
f.close() # 关闭
|
f.close() #关闭
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
|
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
|
||||||
special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—' # 把里面的字符都去掉
|
special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉
|
||||||
|
|
||||||
for c in special_characters:
|
for c in special_characters:
|
||||||
s = s.replace(c, ' ') # 把所有符号都替换成空格,防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
||||||
s = s.replace('--', ' ')
|
s = s.replace('--', ' ')
|
||||||
s = s.strip() # 去除前后的空格
|
s = s.strip() # 去除前后的空格
|
||||||
|
|
||||||
single_quote = '‘’\'' # 各种单引号单独处理
|
|
||||||
n, i = len(s), 0
|
|
||||||
t = '' # 用来收集我需要保留的字符
|
|
||||||
while i < n: # 只有单引号前后都有英文字符,才保留
|
|
||||||
if s[i] in single_quote:
|
|
||||||
if i == 0 or i == n - 1 or s[i - 1] == ' ' or s[i + 1] == ' ':
|
|
||||||
i = i + 1
|
|
||||||
continue # condition 1+2
|
|
||||||
if s[i + 1] == 's' and (i + 2 == n or s[i + 2] == ' '):
|
|
||||||
i = i + 2
|
|
||||||
continue # condition 2
|
|
||||||
t += '\'' # condition 3, standardize quote
|
|
||||||
else:
|
|
||||||
t += s[i]
|
|
||||||
i = i + 1
|
|
||||||
return t
|
|
||||||
|
|
||||||
'''
|
|
||||||
单引号出现在文章中的情况:
|
|
||||||
1、某些情况下作为双引号使用,引用段落
|
|
||||||
这种情况一般出现在词首或词尾
|
|
||||||
处理方式:直接去除
|
|
||||||
2、表示名词所有格
|
|
||||||
对于单数名词以's为后缀,对于复数名词以s'或'为后缀
|
|
||||||
处理方式:将'其后的部分去除
|
|
||||||
3、单词元音位置的缩写
|
|
||||||
最常见的有not->n't/is->'s/have->'ve这类
|
|
||||||
处理方式:保留
|
|
||||||
|
|
||||||
上述处理方式2/3两点可能产生一种冲突:
|
|
||||||
某些单词元音缩写后恰好以's结尾
|
|
||||||
但考虑到用于学习英语的文章一般不会出现过于口语化的缩写单词
|
|
||||||
因此要么还是表所有格,要么就是is的缩写
|
|
||||||
故不考虑这种冲突情况
|
|
||||||
'''
|
|
||||||
|
|
||||||
'''
|
|
||||||
以下是原本的代码
|
|
||||||
if '\'' in s:
|
if '\'' in s:
|
||||||
n = len(s)
|
n = len(s)
|
||||||
t = '' # 用来收集我需要保留的字符
|
t = '' # 用来收集我需要保留的字符
|
||||||
|
@ -96,7 +58,6 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
|
||||||
return t
|
return t
|
||||||
else:
|
else:
|
||||||
return s
|
return s
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
def sort_in_descending_order(lst):# 单词按频率降序排列
|
def sort_in_descending_order(lst):# 单词按频率降序排列
|
||||||
|
@ -113,25 +74,16 @@ def make_html_page(lst, fname):
|
||||||
'''
|
'''
|
||||||
功能:把lst的信息存到fname中,以html格式。
|
功能:把lst的信息存到fname中,以html格式。
|
||||||
'''
|
'''
|
||||||
result = ''
|
s = ''
|
||||||
id = 1
|
count = 1
|
||||||
|
for x in lst:
|
||||||
for word in lst:
|
# <a href="">word</a>
|
||||||
result += '<p>'
|
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
|
||||||
result += '%d ' % id
|
count += 1
|
||||||
result += get_html_hyperlink(word[0])
|
|
||||||
result += ' (%d)' % word[1]
|
|
||||||
result += '</p>'
|
|
||||||
# result += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
|
|
||||||
id += 1
|
|
||||||
|
|
||||||
f = open(fname, 'w')
|
f = open(fname, 'w')
|
||||||
f.write(result)
|
f.write(s)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
def get_html_hyperlink(word):
|
|
||||||
s = '<a href="' + youdao_link(word) + '">' + word + '</a>'
|
|
||||||
return s
|
|
||||||
|
|
||||||
## main(程序入口)
|
## main(程序入口)
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -144,12 +96,12 @@ if __name__ == '__main__':
|
||||||
s = file2str(fname)
|
s = file2str(fname)
|
||||||
else:
|
else:
|
||||||
print('I can accept at most 2 arguments.')
|
print('I can accept at most 2 arguments.')
|
||||||
sys.exit() # 结束程序运行, 下面的代码不会被执行了。
|
sys.exit()# 结束程序运行, 下面的代码不会被执行了。
|
||||||
|
|
||||||
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
|
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
|
||||||
L = freq(s)
|
L = freq(s)
|
||||||
for x in sort_in_descending_order(L):
|
for x in sort_in_descending_order(L):
|
||||||
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
|
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
|
||||||
|
|
||||||
# 把频率的结果放result.html中
|
# 把频率的结果放result.html中
|
||||||
make_html_page(sort_in_descending_order(L), 'result.html')
|
make_html_page(sort_in_descending_order(L), 'result.html')
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
Flask==2.1.2
|
Flask==1.1.2
|
||||||
selenium==4.2.0
|
selenium==3.141.0
|
||||||
PyYAML~=6.0
|
PyYAML~=6.0
|
||||||
|
|
Loading…
Reference in New Issue