forked from mrlan/EnglishPal
Fix bug 511
parent
fb7adc3f22
commit
5447d570e0
|
@ -2,17 +2,18 @@
|
||||||
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
||||||
# Written permission must be obtained from the author for commercial uses.
|
# Written permission must be obtained from the author for commercial uses.
|
||||||
###########################################################################
|
###########################################################################
|
||||||
|
|
||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
|
||||||
class WordFreq:
|
class WordFreq:
|
||||||
def __init__(self, s):
|
def __init__(self, s, max_word_length=30):
|
||||||
self.s = remove_punctuation(s)
|
self.s = remove_punctuation(s)
|
||||||
|
self.max_word_length = max_word_length
|
||||||
|
|
||||||
def get_freq(self):
|
def get_freq(self):
|
||||||
lst = []
|
lst = []
|
||||||
for t in freq(self.s):
|
for t in freq(self.s, self.max_word_length):
|
||||||
word = t[0]
|
word = t[0]
|
||||||
if len(word) > 0 and word[0] in string.ascii_letters:
|
if len(word) > 0 and word[0] in string.ascii_letters:
|
||||||
lst.append(t)
|
lst.append(t)
|
||||||
|
@ -20,6 +21,5 @@ class WordFreq:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
|
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
|
||||||
print(f.get_freq())
|
print(f.get_freq())
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
###########################################################################
|
||||||
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
||||||
|
# Written permission must be obtained from the author for commercial uses.
|
||||||
|
###########################################################################
|
||||||
|
import unittest
|
||||||
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
|
||||||
|
from WordFreq import WordFreq
|
||||||
|
|
||||||
|
|
||||||
|
class TestWordFrequency(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_word_frequency_normal_case(self):
|
||||||
|
text = "BANANA; Banana, apple ORANGE Banana banana."
|
||||||
|
wf = WordFreq(text)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = [('banana', 4), ('orange', 1), ('apple', 1)]
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_word_frequency_with_long_word(self):
|
||||||
|
text = "apple banana " + "a" * 31 + " orange banana apple"
|
||||||
|
wf = WordFreq(text, max_word_length=30)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = [('banana', 2), ('apple', 2), ('orange', 1)]
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_word_frequency_all_long_words(self):
|
||||||
|
text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33
|
||||||
|
wf = WordFreq(text, max_word_length=30)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = []
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_word_frequency_with_punctuation(self):
|
||||||
|
text = "Hello, world! Hello... hello; 'hello' --world--"
|
||||||
|
wf = WordFreq(text)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = [('hello', 4), ('world', 2)]
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_word_frequency_empty_string(self):
|
||||||
|
text = ""
|
||||||
|
wf = WordFreq(text)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = []
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
def test_word_frequency_with_max_length_parameter(self):
|
||||||
|
text = "apple banana apple"
|
||||||
|
wf = WordFreq(text, max_word_length=5)
|
||||||
|
result = wf.get_freq()
|
||||||
|
expected = [('apple', 2)]
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -2,19 +2,19 @@
|
||||||
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
||||||
# Written permission must be obtained from the author for commercial uses.
|
# Written permission must be obtained from the author for commercial uses.
|
||||||
###########################################################################
|
###########################################################################
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import string
|
import string
|
||||||
import operator
|
import os
|
||||||
import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。
|
import sys
|
||||||
import pickle_idea
|
import pickle_idea
|
||||||
|
|
||||||
def freq(fruit):
|
|
||||||
|
def freq(fruit, max_word_length=30):
|
||||||
'''
|
'''
|
||||||
功能: 把字符串转成列表。 目的是得到每个单词的频率。
|
功能: 把字符串转成列表。 目的是得到每个单词的频率。
|
||||||
输入: 字符串
|
输入: 字符串
|
||||||
输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
|
输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
|
||||||
注意事项: 首先要把字符串转成小写。原因是。。。
|
注意事项: 首先要把字符串转小写。
|
||||||
'''
|
'''
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
|
@ -22,24 +22,24 @@ def freq(fruit):
|
||||||
fruit = fruit.lower() # 字母转小写
|
fruit = fruit.lower() # 字母转小写
|
||||||
flst = fruit.split() # 字符串转成list
|
flst = fruit.split() # 字符串转成list
|
||||||
c = collections.Counter(flst)
|
c = collections.Counter(flst)
|
||||||
result = c.most_common()
|
for word, count in c.most_common():
|
||||||
|
if len(word) <= max_word_length:
|
||||||
|
result.append((word, count))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def youdao_link(s): # 有道链接
|
def youdao_link(s): # 有道链接
|
||||||
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
|
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index' # 网址
|
||||||
return link
|
return link
|
||||||
|
|
||||||
|
|
||||||
def file2str(fname): # 文件转字符
|
def file2str(fname): # 文件转字符
|
||||||
f = open(fname) #打开
|
with open(fname) as f: # 使用with打开文件
|
||||||
s = f.read() # 读取
|
s = f.read() # 读取
|
||||||
f.close() #关闭
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def remove_punctuation(s): # 这里是s是形参(parameter)。函数被调用时才给s赋值。
|
def remove_punctuation(s): # 这里是s是形参(parameter)。函数被调用时才给s赋值。
|
||||||
special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
|
||||||
for c in special_characters:
|
for c in special_characters:
|
||||||
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
|
||||||
s = s.replace('--', ' ')
|
s = s.replace('--', ' ')
|
||||||
|
@ -80,9 +80,8 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调
|
||||||
# <a href="">word</a>
|
# <a href="">word</a>
|
||||||
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
|
s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
|
||||||
count += 1
|
count += 1
|
||||||
f = open(fname, 'w')
|
with open(fname, 'w') as f:
|
||||||
f.write(s)
|
f.write(s)
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
## main(程序入口)
|
## main(程序入口)
|
||||||
|
@ -120,4 +119,3 @@ if __name__ == '__main__':
|
||||||
pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
|
pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
2
build.sh
2
build.sh
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal
|
DEPLOYMENT_DIR=/home/main/EnglishPal
|
||||||
cd $DEPLOYMENT_DIR
|
cd $DEPLOYMENT_DIR
|
||||||
pwd
|
pwd
|
||||||
|
|
||||||
|
|
|
@ -8,4 +8,7 @@ Werkzeug==2.2.2
|
||||||
=======
|
=======
|
||||||
|
|
||||||
pytest~=8.1.1
|
pytest~=8.1.1
|
||||||
|
<<<<<<< HEAD
|
||||||
>>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
|
>>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
|
||||||
|
=======
|
||||||
|
>>>>>>> fa65055 (Fix bug 511)
|
||||||
|
|
Loading…
Reference in New Issue