forked from mrlan/EnglishPal
				
			BugFix358 + Improve + Refactor
							parent
							
								
									b9cf94da74
								
							
						
					
					
						commit
						3654776aec
					
				| 
						 | 
				
			
			@ -0,0 +1,27 @@
 | 
			
		|||
<!DOCTYPE html>
 | 
			
		||||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
    <meta charset="utf-8">
 | 
			
		||||
    <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
 | 
			
		||||
    <title>EnglishPal - Analyse Result</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
    <div>
 | 
			
		||||
        <br/>
 | 
			
		||||
        <h3>单词 {{ word }} 的统计信息</h3>
 | 
			
		||||
        <hr/>
 | 
			
		||||
        <ul>
 | 
			
		||||
            <li>在总 {{ resultlen }} 篇文章中,总共出现了 {{ totcount }} 次</li>
 | 
			
		||||
            <li>总占比为 {{ '%.5f' % (100 * totcount / totcnt) }}%</li>
 | 
			
		||||
        </ul>
 | 
			
		||||
        <ul>
 | 
			
		||||
            <li>在一篇文章中:</li>
 | 
			
		||||
            <li>最多出现了 {{ maxcount }} 次</li>
 | 
			
		||||
            <li>最高占比为 {{ '%.5f' % maxrate }}%</li>
 | 
			
		||||
            <li>最少出现了 {{ mincount }} 次</li>
 | 
			
		||||
            <li>最低占比为 {{ '%.5f' % minrate }}%</li>
 | 
			
		||||
        </ul>
 | 
			
		||||
        <input type="button" value="返回" onclick="location.href='{{ username }}'"/>
 | 
			
		||||
    </div>
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
| 
						 | 
				
			
			@ -38,10 +38,17 @@
 | 
			
		|||
 | 
			
		||||
    <p><b>收集生词吧</b> (可以在正文中划词,也可以复制黏贴)</p>
 | 
			
		||||
    <form method="post" action="/{{ username }}">
 | 
			
		||||
        <input type="hidden" name="methodtype" value="multiple"/>
 | 
			
		||||
        <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
 | 
			
		||||
        <input type="submit" value="把生词加入我的生词库"/>
 | 
			
		||||
        <input type="reset" value="清除"/>
 | 
			
		||||
    </form>
 | 
			
		||||
    </form><br/>
 | 
			
		||||
    <form method="post" action="/{{ username }}">
 | 
			
		||||
        <input type="hidden" name="methodtype" value="single"/>
 | 
			
		||||
        <input type="text" name="content"/><br/>
 | 
			
		||||
        <input type="submit" value="分析此单词" style="margin-top:5px"/>
 | 
			
		||||
        <input type="reset" value="清除"/>
 | 
			
		||||
    </form><br/>
 | 
			
		||||
    {% if session.get['thisWord'] %}
 | 
			
		||||
        <script type="text/javascript">
 | 
			
		||||
            //point to the anchor in the page whose id is aaa if it exists
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,9 @@
 | 
			
		|||
            <a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a>
 | 
			
		||||
            ({{x[1]}})
 | 
			
		||||
            <input type="checkbox" name="marked" value={{word}}>
 | 
			
		||||
            {% if x[0] in userwordlist %}
 | 
			
		||||
                  <font color="red"><b>已在生词簿内</b></font>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
        </p>
 | 
			
		||||
 | 
			
		||||
       {% endfor %}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,6 +11,7 @@ import Yaml
 | 
			
		|||
from Article import get_today_article, load_freq_history
 | 
			
		||||
from WordFreq import WordFreq
 | 
			
		||||
from wordfreqCMD import sort_in_descending_order
 | 
			
		||||
from UseSqlite import RecordQuery
 | 
			
		||||
 | 
			
		||||
import pickle_idea
 | 
			
		||||
import pickle_idea2
 | 
			
		||||
| 
						 | 
				
			
			@ -102,10 +103,36 @@ def userpage(username):
 | 
			
		|||
    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
 | 
			
		||||
 | 
			
		||||
    if request.method == 'POST':  # when we submit a form
 | 
			
		||||
        content = request.form['content']
 | 
			
		||||
        f = WordFreq(content)
 | 
			
		||||
        lst = f.get_freq()
 | 
			
		||||
        return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml)
 | 
			
		||||
        if request.form['methodtype'] == 'multiple':
 | 
			
		||||
            content = request.form['content']
 | 
			
		||||
            f = WordFreq(content)
 | 
			
		||||
            lst = f.get_freq()
 | 
			
		||||
            userwordlist = pickle_idea2.dict2lst(load_freq_history(user_freq_record))
 | 
			
		||||
            userwordlist2 = []
 | 
			
		||||
            for t in userwordlist:
 | 
			
		||||
                userwordlist2.append(t[0])
 | 
			
		||||
            return render_template('userpage_post.html', username=username, lst = lst, yml=Yaml.yml, userwordlist=userwordlist2)
 | 
			
		||||
        else:
 | 
			
		||||
            word = request.form['content'].lower()  # 指定单词
 | 
			
		||||
            rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
 | 
			
		||||
            rq.instructions("SELECT * FROM article")  # 获取所有文章
 | 
			
		||||
            rq.do()
 | 
			
		||||
            result = rq.get_results()
 | 
			
		||||
            mincount, maxcount, totcount, totcnt = 1e9, 0, 0, 0
 | 
			
		||||
            maxrate, minrate = 0.0, 100.0
 | 
			
		||||
            for d in result:  # 查找数据库内的所有文章
 | 
			
		||||
                str = d['text'].lower().split()
 | 
			
		||||
                cnt = str.count(word)
 | 
			
		||||
                tot = len(str)
 | 
			
		||||
                totcnt += tot
 | 
			
		||||
                mincount = min(mincount, cnt)
 | 
			
		||||
                maxcount = max(maxcount, cnt)
 | 
			
		||||
                totcount += cnt
 | 
			
		||||
                maxrate = max(maxrate, 100 * cnt / tot)
 | 
			
		||||
                minrate = min(minrate, 100 * cnt / tot)
 | 
			
		||||
            return render_template('analyse_word.html', mincount=mincount, maxcount=maxcount, totcount=totcount, totcnt=totcnt,
 | 
			
		||||
                                   maxrate=maxrate, minrate=minrate, resultlen=len(result),
 | 
			
		||||
                                   word=word, username=session.get('username'))
 | 
			
		||||
 | 
			
		||||
    elif request.method == 'GET':  # when we load a html page
 | 
			
		||||
        d = load_freq_history(user_freq_record)
 | 
			
		||||
| 
						 | 
				
			
			@ -127,10 +154,6 @@ def userpage(username):
 | 
			
		|||
                               yml=Yaml.yml,
 | 
			
		||||
                               words=words)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@userService.route("/<username>/mark", methods=['GET', 'POST'])
 | 
			
		||||
def user_mark_word(username):
 | 
			
		||||
    '''
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,7 +18,6 @@ def freq(fruit):
 | 
			
		|||
    '''
 | 
			
		||||
 | 
			
		||||
    result = []
 | 
			
		||||
    
 | 
			
		||||
    fruit = fruit.lower() # 字母转小写
 | 
			
		||||
    flst = fruit.split()  # 字符串转成list
 | 
			
		||||
    c = collections.Counter(flst)
 | 
			
		||||
| 
						 | 
				
			
			@ -27,24 +26,63 @@ def freq(fruit):
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def youdao_link(s): # 有道链接
 | 
			
		||||
    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
 | 
			
		||||
    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
 | 
			
		||||
    return link
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def file2str(fname):#文件转字符
 | 
			
		||||
    f = open(fname) #打开
 | 
			
		||||
    s = f.read()    #读取
 | 
			
		||||
    f.close()       #关闭
 | 
			
		||||
def file2str(fname): # 文件转字符
 | 
			
		||||
    f = open(fname)  # 打开
 | 
			
		||||
    s = f.read()     # 读取
 | 
			
		||||
    f.close()        # 关闭
 | 
			
		||||
    return s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
 | 
			
		||||
    special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉
 | 
			
		||||
    special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—' # 把里面的字符都去掉
 | 
			
		||||
 | 
			
		||||
    for c in special_characters:
 | 
			
		||||
        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
 | 
			
		||||
        s = s.replace(c, ' ') # 把所有符号都替换成空格,防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
 | 
			
		||||
    s = s.replace('--', ' ')
 | 
			
		||||
    s = s.strip() # 去除前后的空格
 | 
			
		||||
 | 
			
		||||
    single_quote = '‘’\'' # 各种单引号单独处理
 | 
			
		||||
    n, i = len(s), 0
 | 
			
		||||
    t = ''  # 用来收集我需要保留的字符
 | 
			
		||||
    while i < n:  # 只有单引号前后都有英文字符,才保留
 | 
			
		||||
        if s[i] in single_quote:
 | 
			
		||||
            if i == 0 or i == n - 1 or s[i - 1] == ' ' or s[i + 1] == ' ':
 | 
			
		||||
                i = i + 1
 | 
			
		||||
                continue # condition 1+2
 | 
			
		||||
            if s[i + 1] == 's' and (i + 2 == n or s[i + 2] == ' '):
 | 
			
		||||
                i = i + 2
 | 
			
		||||
                continue # condition 2
 | 
			
		||||
            t += '\'' # condition 3, standardize quote
 | 
			
		||||
        else:
 | 
			
		||||
            t += s[i]
 | 
			
		||||
        i = i + 1
 | 
			
		||||
    return t
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    单引号出现在文章中的情况:
 | 
			
		||||
    1、某些情况下作为双引号使用,引用段落
 | 
			
		||||
        这种情况一般出现在词首或词尾
 | 
			
		||||
        处理方式:直接去除
 | 
			
		||||
    2、表示名词所有格
 | 
			
		||||
        对于单数名词以's为后缀,对于复数名词以s'或'为后缀
 | 
			
		||||
        处理方式:将'其后的部分去除
 | 
			
		||||
    3、单词元音位置的缩写
 | 
			
		||||
        最常见的有not->n't/is->'s/have->'ve这类
 | 
			
		||||
        处理方式:保留
 | 
			
		||||
    
 | 
			
		||||
    上述处理方式2/3两点可能产生一种冲突:
 | 
			
		||||
        某些单词元音缩写后恰好以's结尾
 | 
			
		||||
        但考虑到用于学习英语的文章一般不会出现过于口语化的缩写单词
 | 
			
		||||
        因此要么还是表所有格,要么就是is的缩写
 | 
			
		||||
        故不考虑这种冲突情况
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    以下是原本的代码
 | 
			
		||||
    if '\'' in s:
 | 
			
		||||
        n = len(s)
 | 
			
		||||
        t = '' # 用来收集我需要保留的字符
 | 
			
		||||
| 
						 | 
				
			
			@ -58,6 +96,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
 | 
			
		|||
        return t
 | 
			
		||||
    else:
 | 
			
		||||
        return s
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sort_in_descending_order(lst):# 单词按频率降序排列
 | 
			
		||||
| 
						 | 
				
			
			@ -74,16 +113,25 @@ def make_html_page(lst, fname):
 | 
			
		|||
    '''
 | 
			
		||||
    功能:把lst的信息存到fname中,以html格式。
 | 
			
		||||
    '''
 | 
			
		||||
    s = ''
 | 
			
		||||
    count = 1
 | 
			
		||||
    for x in lst:
 | 
			
		||||
        # <a href="">word</a>
 | 
			
		||||
        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
 | 
			
		||||
        count += 1
 | 
			
		||||
    result = ''
 | 
			
		||||
    id = 1
 | 
			
		||||
 | 
			
		||||
    for word in lst:
 | 
			
		||||
        result += '<p>'
 | 
			
		||||
        result += '%d ' % id
 | 
			
		||||
        result += getHyperlinkHTML(word[0])
 | 
			
		||||
        result += ' (%d)' % word[1]
 | 
			
		||||
        result += '</p>'
 | 
			
		||||
        # result += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
 | 
			
		||||
        id += 1
 | 
			
		||||
 | 
			
		||||
    f = open(fname, 'w')
 | 
			
		||||
    f.write(s)
 | 
			
		||||
    f.write(result)
 | 
			
		||||
    f.close()
 | 
			
		||||
 | 
			
		||||
def getHyperlinkHTML(word):
 | 
			
		||||
    s = '<a href="' + youdao_link(word) + '">' + word + '</a>'
 | 
			
		||||
    return s
 | 
			
		||||
 | 
			
		||||
## main(程序入口)
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
| 
						 | 
				
			
			@ -96,12 +144,12 @@ if __name__ == '__main__':
 | 
			
		|||
        s = file2str(fname)
 | 
			
		||||
    else:
 | 
			
		||||
        print('I can accept at most 2 arguments.')
 | 
			
		||||
        sys.exit()# 结束程序运行, 下面的代码不会被执行了。
 | 
			
		||||
        sys.exit() # 结束程序运行, 下面的代码不会被执行了。
 | 
			
		||||
 | 
			
		||||
    s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
 | 
			
		||||
    L = freq(s)
 | 
			
		||||
    for x in sort_in_descending_order(L):
 | 
			
		||||
        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
 | 
			
		||||
        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
 | 
			
		||||
 | 
			
		||||
    # 把频率的结果放result.html中
 | 
			
		||||
    make_html_page(sort_in_descending_order(L), 'result.html') 
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,3 +1,3 @@
 | 
			
		|||
Flask==1.1.2
 | 
			
		||||
selenium==3.141.0
 | 
			
		||||
Flask==2.1.2
 | 
			
		||||
selenium==4.2.0
 | 
			
		||||
PyYAML~=6.0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue