Bug412-JiangLetian

Merge branch 'Bug394-MiaoChenShuo' into master
更新 'app/Article.py'
2022-06-13 11:40:20 +08:00 · 2022-06-12 21:50:36 +08:00 · 2022-06-11 23:21:17 +08:00 · 2022-06-11 23:13:08 +08:00 · 2022-06-05 23:36:55 +08:00
8 changed files with 38 additions and 139 deletions
--- a/app/Login.py
+++ b/app/Login.py
@ -5,6 +5,10 @@ from UseSqlite import InsertQuery, RecordQuery
 path_prefix = '/var/www/wordfreq/wordfreq/'
 path_prefix = './'  # comment this line in deployment
 def verify_pass(newpass,oldpass):
    if(newpass==oldpass):
        return True
 def verify_user(username, password):
    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
@ -47,6 +51,8 @@ def change_password(username, old_password, new_password):
    if not verify_user(username, old_password):  # 旧密码错误
        return False
    # 将用户名和密码一起加密，以免暴露不同用户的相同密码
    if verify_pass(new_password,old_password): #新旧密码一致
        return False
    password = md5(username + new_password)
    rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
    rq.instructions_with_parameters("UPDATE user SET password=:password WHERE name=:username", dict(
--- a/app/static/js/highlight.js
+++ b/app/static/js/highlight.js
@ -29,9 +29,10 @@ function highLight() {
    if (sel_word1 != null) {
        var list = sel_word1.value.split(" ");
        for (var i = 0; i < list.length; ++i) {
-            list[i] = list[i].replace(/(^\s*)|(\s*$)/g, "");
+            list[i] = list[i].replace(/(^\s*)|(\s*$)/g, "");//消除字符串两边空字符
            if (list[i] != "" && "<mark>".indexOf(list[i]) == -1 && "</mark>".indexOf(list[i]) == -1) {
-                txt = txt.replace(new RegExp(list[i], "g"), "<mark>" + list[i] + "</mark>");
+
                txt = txt.replace(new RegExp("\\s"+list[i]+"\\s", "g"), " <mark>" + list[i] + "</mark> ");
            }
        }
    }
@ -40,7 +41,7 @@ function highLight() {
        for (var i = 0; i < list2.length; ++i) {
            list2[i] = list2[i].replace(/(^\s*)|(\s*$)/g, "");
            if (list2[i] != "" && "<mark>".indexOf(list2[i]) == -1 && "</mark>".indexOf(list2[i]) == -1) {
-                txt = txt.replace(new RegExp(list2[i], "g"), "<mark>" + list2[i] + "</mark>");
+                txt = txt.replace(new RegExp("\\s"+list2[i]+"\\s", "g"), " <mark>" + list2[i] + "</mark> ");
            }
        }
    }
--- a/app/templates/analyse_word.html
+++ b/app/templates/analyse_word.html
@ -1,27 +0,0 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=0.5, maximum-scale=3.0, user-scalable=yes" />
    <title>EnglishPal - Analyse Result</title>
 </head>
 <body>
    <div>
        <br/>
        <h3>单词 {{ word }} 的统计信息</h3>
        <hr/>
        <ul>
            <li>在总 {{ resultlen }} 篇文章中，总共出现了 {{ totcount }} 次</li>
            <li>总占比为 {{ '%.5f' % (100 * totcount / totcnt) }}%</li>
        </ul>
        <ul>
            <li>在一篇文章中：</li>
            <li>最多出现了 {{ maxcount }} 次</li>
            <li>最高占比为 {{ '%.5f' % maxrate }}%</li>
            <li>最少出现了 {{ mincount }} 次</li>
            <li>最低占比为 {{ '%.5f' % minrate }}%</li>
        </ul>
        <input type="button" value="返回" onclick="location.href='{{ username }}'"/>
    </div>
 </body>
 </html>
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -38,17 +38,10 @@
    <p><b>收集生词吧</b> （可以在正文中划词，也可以复制黏贴）</p>
    <form method="post" action="/{{ username }}">
        <input type="hidden" name="methodtype" value="multiple"/>
        <textarea name="content" id="selected-words" rows="10" cols="120"></textarea><br/>
        <input type="submit" value="把生词加入我的生词库"/>
        <input type="reset" value="清除"/>
-    </form><br/>
+    </form>
    <form method="post" action="/{{ username }}">
        <input type="hidden" name="methodtype" value="single"/>
        <input type="text" name="content"/><br/>
        <input type="submit" value="分析此单词" style="margin-top:5px"/>
        <input type="reset" value="清除"/>
    </form><br/>
    {% if session.get['thisWord'] %}
        <script type="text/javascript">
            //point to the anchor in the page whose id is aaa if it exists
--- a/app/templates/userpage_post.html
+++ b/app/templates/userpage_post.html
@ -31,9 +31,6 @@
            <a href='http://youdao.com/w/eng/{{word}}/#keyfrom=dict2.index' title={{word}}>{{word}}</a>
            ({{x[1]}})
            <input type="checkbox" name="marked" value={{word}}>
            {% if x[0] in userwordlist %}
                &nbsp; <font color="red"><b>已在生词簿内</b></font>
            {% endif %}
        </p>
       {% endfor %}
--- a/app/user_service.py
+++ b/app/user_service.py
@ -11,7 +11,6 @@ import Yaml
 from Article import get_today_article, load_freq_history
 from WordFreq import WordFreq
 from wordfreqCMD import sort_in_descending_order
 from UseSqlite import RecordQuery
 import pickle_idea
 import pickle_idea2
@ -103,36 +102,10 @@ def userpage(username):
    user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
    if request.method == 'POST':  # when we submit a form
        if request.form['methodtype'] == 'multiple':
        content = request.form['content']
        f = WordFreq(content)
        lst = f.get_freq()
-            userwordlist = pickle_idea2.dict2lst(load_freq_history(user_freq_record))
+        return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml)
            userwordlist2 = []
            for t in userwordlist:
                userwordlist2.append(t[0])
            return render_template('userpage_post.html', username=username, lst = lst, yml=Yaml.yml, userwordlist=userwordlist2)
        else:
            word = request.form['content'].lower()  # 指定单词
            rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
            rq.instructions("SELECT * FROM article")  # 获取所有文章
            rq.do()
            result = rq.get_results()
            mincount, maxcount, totcount, totcnt = 1e9, 0, 0, 0
            maxrate, minrate = 0.0, 100.0
            for d in result:  # 查找数据库内的所有文章
                str = d['text'].lower().split()
                cnt = str.count(word)
                tot = len(str)
                totcnt += tot
                mincount = min(mincount, cnt)
                maxcount = max(maxcount, cnt)
                totcount += cnt
                maxrate = max(maxrate, 100 * cnt / tot)
                minrate = min(minrate, 100 * cnt / tot)
            return render_template('analyse_word.html', mincount=mincount, maxcount=maxcount, totcount=totcount, totcnt=totcnt,
                                   maxrate=maxrate, minrate=minrate, resultlen=len(result),
                                   word=word, username=session.get('username'))
    elif request.method == 'GET':  # when we load a html page
        d = load_freq_history(user_freq_record)
@ -154,6 +127,10 @@ def userpage(username):
                               yml=Yaml.yml,
                               words=words)
@userService.route("/<username>/mark", methods=['GET', 'POST'])
 def user_mark_word(username):
    '''
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -18,6 +18,7 @@ def freq(fruit):
    '''
    result = []
    fruit = fruit.lower() # 字母转小写
    flst = fruit.split()  # 字符串转成list
    c = collections.Counter(flst)
@ -26,63 +27,24 @@ def freq(fruit):
 def youdao_link(s): # 有道链接
-    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
+    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
    return link
-def file2str(fname): # 文件转字符
+def file2str(fname):#文件转字符
-    f = open(fname)  # 打开
+    f = open(fname) #打开
-    s = f.read()     # 读取
+    s = f.read()    #读取
-    f.close()        # 关闭
+    f.close()       #关闭
    return s
 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
-    special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—' # 把里面的字符都去掉
+    special_characters = '_©~=+[]*&$%^@.,?!:;#()"“”—‘’' # 把里面的字符都去掉
    for c in special_characters:
-        s = s.replace(c, ' ') # 把所有符号都替换成空格，防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
+        s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
    s = s.replace('--', ' ')
    s = s.strip() # 去除前后的空格
    single_quote = '‘’\'' # 各种单引号单独处理
    n, i = len(s), 0
    t = ''  # 用来收集我需要保留的字符
    while i < n:  # 只有单引号前后都有英文字符，才保留
        if s[i] in single_quote:
            if i == 0 or i == n - 1 or s[i - 1] == ' ' or s[i + 1] == ' ':
                i = i + 1
                continue # condition 1+2
            if s[i + 1] == 's' and (i + 2 == n or s[i + 2] == ' '):
                i = i + 2
                continue # condition 2
            t += '\'' # condition 3, standardize quote
        else:
            t += s[i]
        i = i + 1
    return t
    '''
    单引号出现在文章中的情况：
    1、某些情况下作为双引号使用，引用段落
        这种情况一般出现在词首或词尾
        处理方式：直接去除
    2、表示名词所有格
        对于单数名词以's为后缀，对于复数名词以s'或'为后缀
        处理方式：将'其后的部分去除
    3、单词元音位置的缩写
        最常见的有not->n't/is->'s/have->'ve这类
        处理方式：保留
    上述处理方式2/3两点可能产生一种冲突：
        某些单词元音缩写后恰好以's结尾
        但考虑到用于学习英语的文章一般不会出现过于口语化的缩写单词
        因此要么还是表所有格，要么就是is的缩写
        故不考虑这种冲突情况
    '''
    '''
    以下是原本的代码
    if '\'' in s:
        n = len(s)
        t = '' # 用来收集我需要保留的字符
@ -96,7 +58,6 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
        return t
    else:
        return s
    '''
 def sort_in_descending_order(lst):# 单词按频率降序排列
@ -113,25 +74,16 @@ def make_html_page(lst, fname):
    '''
    功能：把lst的信息存到fname中，以html格式。
    '''
-    result = ''
+    s = ''
-    id = 1
+    count = 1
-
+    for x in lst:
-    for word in lst:
+        # <a href="">word</a>
-        result += '<p>'
+        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
-        result += '%d ' % id
+        count += 1
        result += getHyperlinkHTML(word[0])
        result += ' (%d)' % word[1]
        result += '</p>'
        # result += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
        id += 1
    f = open(fname, 'w')
-    f.write(result)
+    f.write(s)
    f.close()
 def get_html_hyperlink(word):
    s = '<a href="' + youdao_link(word) + '">' + word + '</a>'
    return s
 ## main（程序入口）
 if __name__ == '__main__':
@ -144,12 +96,12 @@ if __name__ == '__main__':
        s = file2str(fname)
    else:
        print('I can accept at most 2 arguments.')
-        sys.exit() # 结束程序运行， 下面的代码不会被执行了。
+        sys.exit()# 结束程序运行， 下面的代码不会被执行了。
    s = remove_punctuation(s) # 这里是s是实参(argument)，里面有值
    L = freq(s)
    for x in sort_in_descending_order(L):
-        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
+        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
    # 把频率的结果放result.html中
    make_html_page(sort_in_descending_order(L), 'result.html') 
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
-Flask==2.1.2
+Flask==1.1.2
-selenium==4.2.0
+selenium==3.141.0
 PyYAML~=6.0
Author	SHA1	Message	Date
lin	b53e7031e5	Bug412-JiangLetian	2022-06-13 11:40:20 +08:00
缪宸硕	e48008550a	Merge branch 'Bug394-MiaoChenShuo' into master	2022-06-12 21:50:36 +08:00
李凯	fde3be4c23	更新 'app/Article.py'	2022-06-11 23:21:17 +08:00
李凯	5d5f4cf8f2	更新 'app/Article.py'	2022-06-11 23:13:08 +08:00
miaochenshuo	260f62967b	修复 Bug394	2022-06-05 23:36:55 +08:00