Fix bug 511

2024-06-14 20:14:28 +08:00 · 2024-06-14 20:14:28 +08:00 · 5447d570e0
parent fb7adc3f22
commit 5447d570e0
5 changed files with 99 additions and 42 deletions
--- a/app/WordFreq.py
+++ b/app/WordFreq.py
@ -2,17 +2,18 @@
 # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 # Written permission must be obtained from the author for commercial uses.
 ###########################################################################
 from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
 import string
 class WordFreq:
-    def __init__(self, s):
+    def __init__(self, s, max_word_length=30):
        self.s = remove_punctuation(s)
        self.max_word_length = max_word_length
    def get_freq(self):
        lst = []
-        for t in freq(self.s):
+        for t in freq(self.s, self.max_word_length):
            word = t[0]
            if len(word) > 0 and word[0] in string.ascii_letters:
                lst.append(t)
@ -20,6 +21,5 @@ class WordFreq:
 if __name__ == '__main__':
-    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
+    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
    print(f.get_freq())
--- a/app/test_bug511_Bosh.py
+++ b/app/test_bug511_Bosh.py
@ -0,0 +1,56 @@
 ###########################################################################
 # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 # Written permission must be obtained from the author for commercial uses.
 ###########################################################################
 import unittest
 from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
 from WordFreq import WordFreq
 class TestWordFrequency(unittest.TestCase):
    def test_word_frequency_normal_case(self):
        text = "BANANA; Banana, apple ORANGE Banana banana."
        wf = WordFreq(text)
        result = wf.get_freq()
        expected = [('banana', 4), ('orange', 1), ('apple', 1)]
        self.assertEqual(result, expected)
    def test_word_frequency_with_long_word(self):
        text = "apple banana " + "a" * 31 + " orange banana apple"
        wf = WordFreq(text, max_word_length=30)
        result = wf.get_freq()
        expected = [('banana', 2), ('apple', 2), ('orange', 1)]
        self.assertEqual(result, expected)
    def test_word_frequency_all_long_words(self):
        text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33
        wf = WordFreq(text, max_word_length=30)
        result = wf.get_freq()
        expected = []
        self.assertEqual(result, expected)
    def test_word_frequency_with_punctuation(self):
        text = "Hello, world! Hello... hello; 'hello' --world--"
        wf = WordFreq(text)
        result = wf.get_freq()
        expected = [('hello', 4), ('world', 2)]
        self.assertEqual(result, expected)
    def test_word_frequency_empty_string(self):
        text = ""
        wf = WordFreq(text)
        result = wf.get_freq()
        expected = []
        self.assertEqual(result, expected)
    def test_word_frequency_with_max_length_parameter(self):
        text = "apple banana apple"
        wf = WordFreq(text, max_word_length=5)
        result = wf.get_freq()
        expected = [('apple', 2)]
        self.assertEqual(result, expected)
 if __name__ == '__main__':
    unittest.main()
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -2,19 +2,19 @@
 # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 # Written permission must be obtained from the author for commercial uses.
 ###########################################################################
 import collections
 import string
-import operator
+import os
-import os, sys # 引入模块sys，因为我要用里面的sys.argv列表中的信息来读取命令行参数。
+import sys
 import pickle_idea
-def freq(fruit):
+
 def freq(fruit, max_word_length=30):
    '''
    功能： 把字符串转成列表。 目的是得到每个单词的频率。
    输入： 字符串
    输出： 列表， 列表里包含一组元组，每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
-    注意事项： 首先要把字符串转成小写。原因是。。。
+    注意事项： 首先要把字符串转小写。
    '''
    result = []
@ -22,24 +22,24 @@ def freq(fruit):
    fruit = fruit.lower()  # 字母转小写
    flst = fruit.split()  # 字符串转成list
    c = collections.Counter(flst)
-    result = c.most_common()
+    for word, count in c.most_common():
        if len(word) <= max_word_length:
            result.append((word, count))
    return result
 def youdao_link(s):  # 有道链接
-    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
+    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'  # 网址
    return link
-def file2str(fname):#文件转字符
+def file2str(fname):  # 文件转字符
-    f = open(fname) #打开
+    with open(fname) as f:  # 使用with打开文件
-    s = f.read()    #读取
+        s = f.read()  # 读取
    f.close()       #关闭
    return s
-def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
+def remove_punctuation(s):  # 这里是s是形参(parameter)。函数被调用时才给s赋值。
-    special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
+    special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|'  # 把里面的字符都去掉
    for c in special_characters:
        s = s.replace(c, ' ')  # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
    s = s.replace('--', ' ')
@ -51,7 +51,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
        for i in range(n):  # 只有单引号前后都有英文字符，才保留
            if s[i] == '\'':
                i_is_ok = i - 1 >= 0 and i + 1 < n
-                if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters:
+                if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters:
                    t += s[i]
            else:
                t += s[i]
@ -60,12 +60,12 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
        return s
-def sort_in_descending_order(lst):# 单词按频率降序排列
+def sort_in_descending_order(lst):  # 单词按频率降序排列
    lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
    return lst2
-def sort_in_ascending_order(lst):# 单词按频率降序排列
+def sort_in_ascending_order(lst):  # 单词按频率降序排列
    lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
    return lst2
@ -80,9 +80,8 @@ def make_html_page(lst, fname):  # 只是在wordfreqCMD.py中的main函数中调
        # <a href="">word</a>
        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
        count += 1
-    f = open(fname, 'w')
+    with open(fname, 'w') as f:
        f.write(s)
    f.close()
 ## main（程序入口）
@ -96,12 +95,12 @@ if __name__ == '__main__':
        s = file2str(fname)
    else:
        print('I can accept at most 2 arguments.')
-        sys.exit()# 结束程序运行， 下面的代码不会被执行了。
+        sys.exit()  # 结束程序运行，下面的代码不会被执行了。
    s = remove_punctuation(s)  # 这里是s是实参(argument)，里面有值
    L = freq(s)
    for x in sort_in_descending_order(L):
-        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
+        print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))  # 函数导出
    # 把频率的结果放result.html中
    make_html_page(sort_in_descending_order(L), 'result.html')
@ -120,4 +119,3 @@ if __name__ == '__main__':
    pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
--- a/build.sh
+++ b/build.sh
@ -1,6 +1,6 @@
 #!/bin/sh
-DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal
+DEPLOYMENT_DIR=/home/main/EnglishPal
 cd $DEPLOYMENT_DIR
 pwd
--- a/requirements.txt
+++ b/requirements.txt
@ -8,4 +8,7 @@ Werkzeug==2.2.2
 =======
 pytest~=8.1.1
 <<<<<<< HEAD
 >>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
 =======
 >>>>>>> fa65055 (Fix bug 511)