Compare commits
	
		
			3 Commits 
		
	
	
		
			master
			...
			Bug358-Ten
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 83a11244ac | |
|  | d0dfa605a9 | |
|  | 3654776aec | 
|  | @ -0,0 +1,27 @@ | ||||||
|  | <!DOCTYPE html> | ||||||
|  | <html lang="en"> | ||||||
|  | <head> | ||||||
|  |     <meta charset="utf-8"> | ||||||
|  |     <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" /> | ||||||
|  |     <title>EnglishPal - Analyse Result</title> | ||||||
|  | </head> | ||||||
|  | <body> | ||||||
|  |     <div> | ||||||
|  |         <br/> | ||||||
|  |         <h3>单词 {{ word }} 的统计信息</h3> | ||||||
|  |         <hr/> | ||||||
|  |         <ul> | ||||||
|  |             <li>在总 {{ resultlen }} 篇文章中,总共出现了 {{ totcount }} 次</li> | ||||||
|  |             <li>总占比为 {{ '%.5f' % (100 * totcount / totcnt) }}%</li> | ||||||
|  |         </ul> | ||||||
|  |         <ul> | ||||||
|  |             <li>在一篇文章中:</li> | ||||||
|  |             <li>最多出现了 {{ maxcount }} 次</li> | ||||||
|  |             <li>最高占比为 {{ '%.5f' % maxrate }}%</li> | ||||||
|  |             <li>最少出现了 {{ mincount }} 次</li> | ||||||
|  |             <li>最低占比为 {{ '%.5f' % minrate }}%</li> | ||||||
|  |         </ul> | ||||||
|  |         <input type="button" value="返回" onclick="location.href='{{ username }}'"/> | ||||||
|  |     </div> | ||||||
|  | </body> | ||||||
|  | </html> | ||||||
|  | @ -38,10 +38,17 @@ | ||||||
| 
 | 
 | ||||||
|     <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p> |     <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p> | ||||||
|     <form method="post" action="/{{ username }}"> |     <form method="post" action="/{{ username }}"> | ||||||
|  |         <input type="hidden" name="methodtype" value="multiple"/> | ||||||
|         <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/> |         <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/> | ||||||
|         <input type="submit" value="把生词加入我的生词库"/> |         <input type="submit" value="把生词加入我的生词库"/> | ||||||
|         <input type="reset" value="清除"/> |         <input type="reset" value="清除"/> | ||||||
|     </form> |     </form><br/> | ||||||
|  |     <form method="post" action="/{{ username }}"> | ||||||
|  |         <input type="hidden" name="methodtype" value="single"/> | ||||||
|  |         <input type="text" name="content"/><br/> | ||||||
|  |         <input type="submit" value="分析此单词" style="margin-top:5px"/> | ||||||
|  |         <input type="reset" value="清除"/> | ||||||
|  |     </form><br/> | ||||||
|     {% if session.get['thisWord'] %} |     {% if session.get['thisWord'] %} | ||||||
|         <script type="text/javascript"> |         <script type="text/javascript"> | ||||||
|             //point to the anchor in the page whose id is aaa if it exists |             //point to the anchor in the page whose id is aaa if it exists | ||||||
|  |  | ||||||
|  | @ -31,6 +31,9 @@ | ||||||
|             <a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a> |             <a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a> | ||||||
|             ({{x[1]}}) |             ({{x[1]}}) | ||||||
|             <input type="checkbox" name="marked" value={{word}}> |             <input type="checkbox" name="marked" value={{word}}> | ||||||
|  |             {% if x[0] in userwordlist %} | ||||||
|  |                   <font color="red"><b>已在生词簿内</b></font> | ||||||
|  |             {% endif %} | ||||||
|         </p> |         </p> | ||||||
| 
 | 
 | ||||||
|        {% endfor %} |        {% endfor %} | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ import Yaml | ||||||
| from Article import get_today_article, load_freq_history | from Article import get_today_article, load_freq_history | ||||||
| from WordFreq import WordFreq | from WordFreq import WordFreq | ||||||
| from wordfreqCMD import sort_in_descending_order | from wordfreqCMD import sort_in_descending_order | ||||||
|  | from UseSqlite import RecordQuery | ||||||
| 
 | 
 | ||||||
| import pickle_idea | import pickle_idea | ||||||
| import pickle_idea2 | import pickle_idea2 | ||||||
|  | @ -102,10 +103,36 @@ def userpage(username): | ||||||
|     user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) |     user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) | ||||||
| 
 | 
 | ||||||
|     if request.method == 'POST':  # when we submit a form |     if request.method == 'POST':  # when we submit a form | ||||||
|  |         if request.form['methodtype'] == 'multiple': | ||||||
|             content = request.form['content'] |             content = request.form['content'] | ||||||
|             f = WordFreq(content) |             f = WordFreq(content) | ||||||
|             lst = f.get_freq() |             lst = f.get_freq() | ||||||
|         return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml) |             userwordlist = pickle_idea2.dict2lst(load_freq_history(user_freq_record)) | ||||||
|  |             userwordlist2 = [] | ||||||
|  |             for t in userwordlist: | ||||||
|  |                 userwordlist2.append(t[0]) | ||||||
|  |             return render_template('userpage_post.html', username=username, lst = lst, yml=Yaml.yml, userwordlist=userwordlist2) | ||||||
|  |         else: | ||||||
|  |             word = request.form['content'].lower()  # 指定单词 | ||||||
|  |             rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') | ||||||
|  |             rq.instructions("SELECT * FROM article")  # 获取所有文章 | ||||||
|  |             rq.do() | ||||||
|  |             result = rq.get_results() | ||||||
|  |             mincount, maxcount, totcount, totcnt = 1e9, 0, 0, 0 | ||||||
|  |             maxrate, minrate = 0.0, 100.0 | ||||||
|  |             for d in result:  # 查找数据库内的所有文章 | ||||||
|  |                 str = d['text'].lower().split() | ||||||
|  |                 cnt = str.count(word) | ||||||
|  |                 tot = len(str) | ||||||
|  |                 totcnt += tot | ||||||
|  |                 mincount = min(mincount, cnt) | ||||||
|  |                 maxcount = max(maxcount, cnt) | ||||||
|  |                 totcount += cnt | ||||||
|  |                 maxrate = max(maxrate, 100 * cnt / tot) | ||||||
|  |                 minrate = min(minrate, 100 * cnt / tot) | ||||||
|  |             return render_template('analyse_word.html', mincount=mincount, maxcount=maxcount, totcount=totcount, totcnt=totcnt, | ||||||
|  |                                    maxrate=maxrate, minrate=minrate, resultlen=len(result), | ||||||
|  |                                    word=word, username=session.get('username')) | ||||||
| 
 | 
 | ||||||
|     elif request.method == 'GET':  # when we load a html page |     elif request.method == 'GET':  # when we load a html page | ||||||
|         d = load_freq_history(user_freq_record) |         d = load_freq_history(user_freq_record) | ||||||
|  | @ -127,10 +154,6 @@ def userpage(username): | ||||||
|                                yml=Yaml.yml, |                                yml=Yaml.yml, | ||||||
|                                words=words) |                                words=words) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @userService.route("/<username>/mark", methods=['GET', 'POST']) | @userService.route("/<username>/mark", methods=['GET', 'POST']) | ||||||
| def user_mark_word(username): | def user_mark_word(username): | ||||||
|     ''' |     ''' | ||||||
|  |  | ||||||
|  | @ -18,7 +18,6 @@ def freq(fruit): | ||||||
|     ''' |     ''' | ||||||
| 
 | 
 | ||||||
|     result = [] |     result = [] | ||||||
|      |  | ||||||
|     fruit = fruit.lower() # 字母转小写 |     fruit = fruit.lower() # 字母转小写 | ||||||
|     flst = fruit.split()  # 字符串转成list |     flst = fruit.split()  # 字符串转成list | ||||||
|     c = collections.Counter(flst) |     c = collections.Counter(flst) | ||||||
|  | @ -27,24 +26,63 @@ def freq(fruit): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def youdao_link(s): # 有道链接 | def youdao_link(s): # 有道链接 | ||||||
|     link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 |     link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址 | ||||||
|     return link |     return link | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def file2str(fname):#文件转字符 | def file2str(fname): # 文件转字符 | ||||||
|     f = open(fname) #打开 |     f = open(fname)  # 打开 | ||||||
|     s = f.read()    #读取 |     s = f.read()     # 读取 | ||||||
|     f.close()       #关闭 |     f.close()        # 关闭 | ||||||
|     return s |     return s | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 | def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 | ||||||
|     special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉 |     special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—' # 把里面的字符都去掉 | ||||||
|  | 
 | ||||||
|     for c in special_characters: |     for c in special_characters: | ||||||
|         s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 |         s = s.replace(c, ' ') # 把所有符号都替换成空格,防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 | ||||||
|     s = s.replace('--', ' ') |     s = s.replace('--', ' ') | ||||||
|     s = s.strip() # 去除前后的空格 |     s = s.strip() # 去除前后的空格 | ||||||
| 
 | 
 | ||||||
|  |     single_quote = '‘’\'' # 各种单引号单独处理 | ||||||
|  |     n, i = len(s), 0 | ||||||
|  |     t = ''  # 用来收集我需要保留的字符 | ||||||
|  |     while i < n:  # 只有单引号前后都有英文字符,才保留 | ||||||
|  |         if s[i] in single_quote: | ||||||
|  |             if i == 0 or i == n - 1 or s[i - 1] == ' ' or s[i + 1] == ' ': | ||||||
|  |                 i = i + 1 | ||||||
|  |                 continue # condition 1+2 | ||||||
|  |             if s[i + 1] == 's' and (i + 2 == n or s[i + 2] == ' '): | ||||||
|  |                 i = i + 2 | ||||||
|  |                 continue # condition 2 | ||||||
|  |             t += '\'' # condition 3, standardize quote | ||||||
|  |         else: | ||||||
|  |             t += s[i] | ||||||
|  |         i = i + 1 | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     单引号出现在文章中的情况: | ||||||
|  |     1、某些情况下作为双引号使用,引用段落 | ||||||
|  |         这种情况一般出现在词首或词尾 | ||||||
|  |         处理方式:直接去除 | ||||||
|  |     2、表示名词所有格 | ||||||
|  |         对于单数名词以's为后缀,对于复数名词以s'或'为后缀 | ||||||
|  |         处理方式:将'其后的部分去除 | ||||||
|  |     3、单词元音位置的缩写 | ||||||
|  |         最常见的有not->n't/is->'s/have->'ve这类 | ||||||
|  |         处理方式:保留 | ||||||
|  |      | ||||||
|  |     上述处理方式2/3两点可能产生一种冲突: | ||||||
|  |         某些单词元音缩写后恰好以's结尾 | ||||||
|  |         但考虑到用于学习英语的文章一般不会出现过于口语化的缩写单词 | ||||||
|  |         因此要么还是表所有格,要么就是is的缩写 | ||||||
|  |         故不考虑这种冲突情况 | ||||||
|  |     ''' | ||||||
|  | 
 | ||||||
|  |     ''' | ||||||
|  |     以下是原本的代码 | ||||||
|     if '\'' in s: |     if '\'' in s: | ||||||
|         n = len(s) |         n = len(s) | ||||||
|         t = '' # 用来收集我需要保留的字符 |         t = '' # 用来收集我需要保留的字符 | ||||||
|  | @ -58,6 +96,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用 | ||||||
|         return t |         return t | ||||||
|     else: |     else: | ||||||
|         return s |         return s | ||||||
|  |     ''' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def sort_in_descending_order(lst):# 单词按频率降序排列 | def sort_in_descending_order(lst):# 单词按频率降序排列 | ||||||
|  | @ -74,16 +113,25 @@ def make_html_page(lst, fname): | ||||||
|     ''' |     ''' | ||||||
|     功能:把lst的信息存到fname中,以html格式。 |     功能:把lst的信息存到fname中,以html格式。 | ||||||
|     ''' |     ''' | ||||||
|     s = '' |     result = '' | ||||||
|     count = 1 |     id = 1 | ||||||
|     for x in lst: | 
 | ||||||
|         # <a href="">word</a> |     for word in lst: | ||||||
|         s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1]) |         result += '<p>' | ||||||
|         count += 1 |         result += '%d ' % id | ||||||
|  |         result += get_html_hyperlink(word[0]) | ||||||
|  |         result += ' (%d)' % word[1] | ||||||
|  |         result += '</p>' | ||||||
|  |         # result += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1]) | ||||||
|  |         id += 1 | ||||||
|  | 
 | ||||||
|     f = open(fname, 'w') |     f = open(fname, 'w') | ||||||
|     f.write(s) |     f.write(result) | ||||||
|     f.close() |     f.close() | ||||||
| 
 | 
 | ||||||
|  | def get_html_hyperlink(word): | ||||||
|  |     s = '<a href="' + youdao_link(word) + '">' + word + '</a>' | ||||||
|  |     return s | ||||||
| 
 | 
 | ||||||
| ## main(程序入口) | ## main(程序入口) | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|  | @ -96,12 +144,12 @@ if __name__ == '__main__': | ||||||
|         s = file2str(fname) |         s = file2str(fname) | ||||||
|     else: |     else: | ||||||
|         print('I can accept at most 2 arguments.') |         print('I can accept at most 2 arguments.') | ||||||
|         sys.exit()# 结束程序运行, 下面的代码不会被执行了。 |         sys.exit() # 结束程序运行, 下面的代码不会被执行了。 | ||||||
| 
 | 
 | ||||||
|     s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 |     s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 | ||||||
|     L = freq(s) |     L = freq(s) | ||||||
|     for x in sort_in_descending_order(L): |     for x in sort_in_descending_order(L): | ||||||
|         print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 |         print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出 | ||||||
| 
 | 
 | ||||||
|     # 把频率的结果放result.html中 |     # 把频率的结果放result.html中 | ||||||
|     make_html_page(sort_in_descending_order(L), 'result.html')  |     make_html_page(sort_in_descending_order(L), 'result.html')  | ||||||
|  |  | ||||||
|  | @ -1,3 +1,3 @@ | ||||||
| Flask==1.1.2 | Flask==2.1.2 | ||||||
| selenium==3.141.0 | selenium==4.2.0 | ||||||
| PyYAML~=6.0 | PyYAML~=6.0 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue