0
0
Fork 0

Fix bug 511

Bug511-Bosh
BOSHKHUDZHAEV MAZHNUN 2024-06-14 20:14:28 +08:00
parent fb7adc3f22
commit 5447d570e0
5 changed files with 99 additions and 42 deletions

10
app/WordFreq.py Normal file → Executable file
View File

@ -2,17 +2,18 @@
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net> # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses. # Written permission must be obtained from the author for commercial uses.
########################################################################### ###########################################################################
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
import string import string
class WordFreq: class WordFreq:
def __init__(self, s): def __init__(self, s, max_word_length=30):
self.s = remove_punctuation(s) self.s = remove_punctuation(s)
self.max_word_length = max_word_length
def get_freq(self): def get_freq(self):
lst = [] lst = []
for t in freq(self.s): for t in freq(self.s, self.max_word_length):
word = t[0] word = t[0]
if len(word) > 0 and word[0] in string.ascii_letters: if len(word) > 0 and word[0] in string.ascii_letters:
lst.append(t) lst.append(t)
@ -20,6 +21,5 @@ class WordFreq:
if __name__ == '__main__': if __name__ == '__main__':
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.') f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
print(f.get_freq()) print(f.get_freq())

56
app/test_bug511_Bosh.py Executable file
View File

@ -0,0 +1,56 @@
###########################################################################
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses.
###########################################################################
import unittest
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
from WordFreq import WordFreq
class TestWordFrequency(unittest.TestCase):
def test_word_frequency_normal_case(self):
text = "BANANA; Banana, apple ORANGE Banana banana."
wf = WordFreq(text)
result = wf.get_freq()
expected = [('banana', 4), ('orange', 1), ('apple', 1)]
self.assertEqual(result, expected)
def test_word_frequency_with_long_word(self):
text = "apple banana " + "a" * 31 + " orange banana apple"
wf = WordFreq(text, max_word_length=30)
result = wf.get_freq()
expected = [('banana', 2), ('apple', 2), ('orange', 1)]
self.assertEqual(result, expected)
def test_word_frequency_all_long_words(self):
text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33
wf = WordFreq(text, max_word_length=30)
result = wf.get_freq()
expected = []
self.assertEqual(result, expected)
def test_word_frequency_with_punctuation(self):
text = "Hello, world! Hello... hello; 'hello' --world--"
wf = WordFreq(text)
result = wf.get_freq()
expected = [('hello', 4), ('world', 2)]
self.assertEqual(result, expected)
def test_word_frequency_empty_string(self):
text = ""
wf = WordFreq(text)
result = wf.get_freq()
expected = []
self.assertEqual(result, expected)
def test_word_frequency_with_max_length_parameter(self):
text = "apple banana apple"
wf = WordFreq(text, max_word_length=5)
result = wf.get_freq()
expected = [('apple', 2)]
self.assertEqual(result, expected)
if __name__ == '__main__':
unittest.main()

42
app/wordfreqCMD.py Normal file → Executable file
View File

@ -2,19 +2,19 @@
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net> # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
# Written permission must be obtained from the author for commercial uses. # Written permission must be obtained from the author for commercial uses.
########################################################################### ###########################################################################
import collections import collections
import string import string
import operator import os
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。 import sys
import pickle_idea import pickle_idea
def freq(fruit):
def freq(fruit, max_word_length=30):
''' '''
功能 把字符串转成列表 目的是得到每个单词的频率 功能 把字符串转成列表 目的是得到每个单词的频率
输入 字符串 输入 字符串
输出 列表 列表里包含一组元组每个元组包含单词与单词的频率 比如 [('apple', 2), ('banana', 1)] 输出 列表 列表里包含一组元组每个元组包含单词与单词的频率 比如 [('apple', 2), ('banana', 1)]
注意事项 首先要把字符串转小写原因是 注意事项 首先要把字符串转小写
''' '''
result = [] result = []
@ -22,24 +22,24 @@ def freq(fruit):
fruit = fruit.lower() # 字母转小写 fruit = fruit.lower() # 字母转小写
flst = fruit.split() # 字符串转成list flst = fruit.split() # 字符串转成list
c = collections.Counter(flst) c = collections.Counter(flst)
result = c.most_common() for word, count in c.most_common():
if len(word) <= max_word_length:
result.append((word, count))
return result return result
def youdao_link(s): # 有道链接 def youdao_link(s): # 有道链接
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
return link return link
def file2str(fname):#文件转字符 def file2str(fname): # 文件转字符
f = open(fname) #打开 with open(fname) as f: # 使用with打开文件
s = f.read() #读取 s = f.read() # 读取
f.close() #关闭
return s return s
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 def remove_punctuation(s): # 这里是s是形参(parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
for c in special_characters: for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ') s = s.replace('--', ' ')
@ -51,7 +51,7 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
for i in range(n): # 只有单引号前后都有英文字符,才保留 for i in range(n): # 只有单引号前后都有英文字符,才保留
if s[i] == '\'': if s[i] == '\'':
i_is_ok = i - 1 >= 0 and i + 1 < n i_is_ok = i - 1 >= 0 and i + 1 < n
if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters: if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters:
t += s[i] t += s[i]
else: else:
t += s[i] t += s[i]
@ -60,12 +60,12 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用
return s return s
def sort_in_descending_order(lst):# 单词按频率降序排列 def sort_in_descending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0])) lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
return lst2 return lst2
def sort_in_ascending_order(lst):# 单词按频率降序排列 def sort_in_ascending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0])) lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
return lst2 return lst2
@ -80,9 +80,8 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调
# <a href="">word</a> # <a href="">word</a>
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1]) s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
count += 1 count += 1
f = open(fname, 'w') with open(fname, 'w') as f:
f.write(s) f.write(s)
f.close()
## main程序入口 ## main程序入口
@ -96,12 +95,12 @@ if __name__ == '__main__':
s = file2str(fname) s = file2str(fname)
else: else:
print('I can accept at most 2 arguments.') print('I can accept at most 2 arguments.')
sys.exit()# 结束程序运行, 下面的代码不会被执行了。 sys.exit() # 结束程序运行,下面的代码不会被执行了。
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
L = freq(s) L = freq(s)
for x in sort_in_descending_order(L): for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
# 把频率的结果放result.html中 # 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html') make_html_page(sort_in_descending_order(L), 'result.html')
@ -120,4 +119,3 @@ if __name__ == '__main__':
pickle_idea.save_frequency_to_pickle(d, 'frequency.p') pickle_idea.save_frequency_to_pickle(d, 'frequency.p')

View File

@ -1,6 +1,6 @@
#!/bin/sh #!/bin/sh
DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal DEPLOYMENT_DIR=/home/main/EnglishPal
cd $DEPLOYMENT_DIR cd $DEPLOYMENT_DIR
pwd pwd

View File

@ -8,4 +8,7 @@ Werkzeug==2.2.2
======= =======
pytest~=8.1.1 pytest~=8.1.1
<<<<<<< HEAD
>>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面) >>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
=======
>>>>>>> fa65055 (Fix bug 511)