diff --git a/app/WordFreq.py b/app/WordFreq.py old mode 100644 new mode 100755 index 3620a41..eaf48c8 --- a/app/WordFreq.py +++ b/app/WordFreq.py @@ -2,24 +2,24 @@ # Copyright 2019 (C) Hui Lan # Written permission must be obtained from the author for commercial uses. ########################################################################### - from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order import string + class WordFreq: - def __init__(self, s): + def __init__(self, s, max_word_length=30): self.s = remove_punctuation(s) + self.max_word_length = max_word_length def get_freq(self): lst = [] - for t in freq(self.s): + for t in freq(self.s, self.max_word_length): word = t[0] if len(word) > 0 and word[0] in string.ascii_letters: lst.append(t) return sort_in_descending_order(lst) - + if __name__ == '__main__': - f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.') + f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30) print(f.get_freq()) - diff --git a/app/test_bug511_Bosh.py b/app/test_bug511_Bosh.py new file mode 100755 index 0000000..a4a80e8 --- /dev/null +++ b/app/test_bug511_Bosh.py @@ -0,0 +1,56 @@ +########################################################################### +# Copyright 2019 (C) Hui Lan +# Written permission must be obtained from the author for commercial uses. +########################################################################### +import unittest +from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order +from WordFreq import WordFreq + + +class TestWordFrequency(unittest.TestCase): + + def test_word_frequency_normal_case(self): + text = "BANANA; Banana, apple ORANGE Banana banana." + wf = WordFreq(text) + result = wf.get_freq() + expected = [('banana', 4), ('orange', 1), ('apple', 1)] + self.assertEqual(result, expected) + + def test_word_frequency_with_long_word(self): + text = "apple banana " + "a" * 31 + " orange banana apple" + wf = WordFreq(text, max_word_length=30) + result = wf.get_freq() + expected = [('banana', 2), ('apple', 2), ('orange', 1)] + self.assertEqual(result, expected) + + def test_word_frequency_all_long_words(self): + text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33 + wf = WordFreq(text, max_word_length=30) + result = wf.get_freq() + expected = [] + self.assertEqual(result, expected) + + def test_word_frequency_with_punctuation(self): + text = "Hello, world! Hello... hello; 'hello' --world--" + wf = WordFreq(text) + result = wf.get_freq() + expected = [('hello', 4), ('world', 2)] + self.assertEqual(result, expected) + + def test_word_frequency_empty_string(self): + text = "" + wf = WordFreq(text) + result = wf.get_freq() + expected = [] + self.assertEqual(result, expected) + + def test_word_frequency_with_max_length_parameter(self): + text = "apple banana apple" + wf = WordFreq(text, max_word_length=5) + result = wf.get_freq() + expected = [('apple', 2)] + self.assertEqual(result, expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py old mode 100644 new mode 100755 index dcee74e..34359d1 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -2,56 +2,56 @@ # Copyright 2019 (C) Hui Lan # Written permission must be obtained from the author for commercial uses. ########################################################################### - import collections import string -import operator -import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 +import os +import sys import pickle_idea -def freq(fruit): + +def freq(fruit, max_word_length=30): ''' 功能: 把字符串转成列表。 目的是得到每个单词的频率。 输入: 字符串 输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)] - 注意事项: 首先要把字符串转成小写。原因是。。。 + 注意事项: 首先要把字符串转小写。 ''' result = [] - - fruit = fruit.lower() # 字母转小写 + + fruit = fruit.lower() # 字母转小写 flst = fruit.split() # 字符串转成list c = collections.Counter(flst) - result = c.most_common() + for word, count in c.most_common(): + if len(word) <= max_word_length: + result.append((word, count)) return result - -def youdao_link(s): # 有道链接 - link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 +def youdao_link(s): # 有道链接 + link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址 return link -def file2str(fname):#文件转字符 - f = open(fname) #打开 - s = f.read() #读取 - f.close() #关闭 +def file2str(fname): # 文件转字符 + with open(fname) as f: # 使用with打开文件 + s = f.read() # 读取 return s -def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 - special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 +def remove_punctuation(s): # 这里是s是形参(parameter)。函数被调用时才给s赋值。 + special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 for c in special_characters: - s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 + s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace('--', ' ') - s = s.strip() # 去除前后的空格 - + s = s.strip() # 去除前后的空格 + if '\'' in s: n = len(s) - t = '' # 用来收集我需要保留的字符 - for i in range(n): # 只有单引号前后都有英文字符,才保留 + t = '' # 用来收集我需要保留的字符 + for i in range(n): # 只有单引号前后都有英文字符,才保留 if s[i] == '\'': i_is_ok = i - 1 >= 0 and i + 1 < n - if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters: + if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters: t += s[i] else: t += s[i] @@ -60,12 +60,12 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用 return s -def sort_in_descending_order(lst):# 单词按频率降序排列 +def sort_in_descending_order(lst): # 单词按频率降序排列 lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0])) return lst2 -def sort_in_ascending_order(lst):# 单词按频率降序排列 +def sort_in_ascending_order(lst): # 单词按频率降序排列 lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0])) return lst2 @@ -80,31 +80,30 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调 # word s += '

%d %s (%d)

' % (count, youdao_link(x[0]), x[0], x[1]) count += 1 - f = open(fname, 'w') - f.write(s) - f.close() + with open(fname, 'w') as f: + f.write(s) ## main(程序入口) if __name__ == '__main__': num = len(sys.argv) - if num == 1: # 从键盘读入字符串 + if num == 1: # 从键盘读入字符串 s = input() - elif num == 2: # 从文件读入字符串 + elif num == 2: # 从文件读入字符串 fname = sys.argv[1] s = file2str(fname) else: print('I can accept at most 2 arguments.') - sys.exit()# 结束程序运行, 下面的代码不会被执行了。 + sys.exit() # 结束程序运行,下面的代码不会被执行了。 - s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 + s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 L = freq(s) for x in sort_in_descending_order(L): - print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 + print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出 # 把频率的结果放result.html中 - make_html_page(sort_in_descending_order(L), 'result.html') + make_html_page(sort_in_descending_order(L), 'result.html') print('\nHistory:\n') if os.path.exists('frequency.p'): @@ -120,4 +119,3 @@ if __name__ == '__main__': pickle_idea.save_frequency_to_pickle(d, 'frequency.p') - diff --git a/build.sh b/build.sh index 158a86f..c42d77f 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/sh -DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal +DEPLOYMENT_DIR=/home/main/EnglishPal cd $DEPLOYMENT_DIR pwd diff --git a/requirements.txt b/requirements.txt index ee6e128..7e0c4a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,7 @@ Werkzeug==2.2.2 ======= pytest~=8.1.1 +<<<<<<< HEAD >>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面) +======= +>>>>>>> fa65055 (Fix bug 511)