Fix bug 511
							parent
							
								
									fb7adc3f22
								
							
						
					
					
						commit
						5447d570e0
					
				|  | @ -2,24 +2,24 @@ | |||
| # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net> | ||||
| # Written permission must be obtained from the author for commercial uses. | ||||
| ########################################################################### | ||||
| 
 | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order | ||||
| import string | ||||
| 
 | ||||
| 
 | ||||
| class WordFreq: | ||||
|     def __init__(self, s): | ||||
|     def __init__(self, s, max_word_length=30): | ||||
|         self.s = remove_punctuation(s) | ||||
|         self.max_word_length = max_word_length | ||||
| 
 | ||||
|     def get_freq(self): | ||||
|         lst = [] | ||||
|         for t in freq(self.s): | ||||
|         for t in freq(self.s, self.max_word_length): | ||||
|             word = t[0] | ||||
|             if len(word) > 0 and word[0] in string.ascii_letters: | ||||
|                 lst.append(t) | ||||
|         return sort_in_descending_order(lst) | ||||
|      | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.') | ||||
|     f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30) | ||||
|     print(f.get_freq()) | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,56 @@ | |||
| ########################################################################### | ||||
| # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net> | ||||
| # Written permission must be obtained from the author for commercial uses. | ||||
| ########################################################################### | ||||
| import unittest | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order | ||||
| from WordFreq import WordFreq | ||||
| 
 | ||||
| 
 | ||||
| class TestWordFrequency(unittest.TestCase): | ||||
| 
 | ||||
|     def test_word_frequency_normal_case(self): | ||||
|         text = "BANANA; Banana, apple ORANGE Banana banana." | ||||
|         wf = WordFreq(text) | ||||
|         result = wf.get_freq() | ||||
|         expected = [('banana', 4), ('orange', 1), ('apple', 1)] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_word_frequency_with_long_word(self): | ||||
|         text = "apple banana " + "a" * 31 + " orange banana apple" | ||||
|         wf = WordFreq(text, max_word_length=30) | ||||
|         result = wf.get_freq() | ||||
|         expected = [('banana', 2), ('apple', 2), ('orange', 1)] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_word_frequency_all_long_words(self): | ||||
|         text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33 | ||||
|         wf = WordFreq(text, max_word_length=30) | ||||
|         result = wf.get_freq() | ||||
|         expected = [] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_word_frequency_with_punctuation(self): | ||||
|         text = "Hello, world! Hello... hello; 'hello' --world--" | ||||
|         wf = WordFreq(text) | ||||
|         result = wf.get_freq() | ||||
|         expected = [('hello', 4), ('world', 2)] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_word_frequency_empty_string(self): | ||||
|         text = "" | ||||
|         wf = WordFreq(text) | ||||
|         result = wf.get_freq() | ||||
|         expected = [] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_word_frequency_with_max_length_parameter(self): | ||||
|         text = "apple banana apple" | ||||
|         wf = WordFreq(text, max_word_length=5) | ||||
|         result = wf.get_freq() | ||||
|         expected = [('apple', 2)] | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|  | @ -2,56 +2,56 @@ | |||
| # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net> | ||||
| # Written permission must be obtained from the author for commercial uses. | ||||
| ########################################################################### | ||||
| 
 | ||||
| import collections | ||||
| import string | ||||
| import operator | ||||
| import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 | ||||
| import os | ||||
| import sys | ||||
| import pickle_idea | ||||
| 
 | ||||
| def freq(fruit): | ||||
| 
 | ||||
| def freq(fruit, max_word_length=30): | ||||
|     ''' | ||||
|     功能: 把字符串转成列表。 目的是得到每个单词的频率。 | ||||
|     输入: 字符串 | ||||
|     输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)] | ||||
|     注意事项: 首先要把字符串转成小写。原因是。。。 | ||||
|     注意事项: 首先要把字符串转小写。 | ||||
|     ''' | ||||
| 
 | ||||
|     result = [] | ||||
|      | ||||
|     fruit = fruit.lower() # 字母转小写 | ||||
| 
 | ||||
|     fruit = fruit.lower()  # 字母转小写 | ||||
|     flst = fruit.split()  # 字符串转成list | ||||
|     c = collections.Counter(flst) | ||||
|     result = c.most_common() | ||||
|     for word, count in c.most_common(): | ||||
|         if len(word) <= max_word_length: | ||||
|             result.append((word, count)) | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def youdao_link(s): # 有道链接 | ||||
|     link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 | ||||
| def youdao_link(s):  # 有道链接 | ||||
|     link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'  # 网址 | ||||
|     return link | ||||
| 
 | ||||
| 
 | ||||
| def file2str(fname):#文件转字符 | ||||
|     f = open(fname) #打开 | ||||
|     s = f.read()    #读取 | ||||
|     f.close()       #关闭 | ||||
| def file2str(fname):  # 文件转字符 | ||||
|     with open(fname) as f:  # 使用with打开文件 | ||||
|         s = f.read()  # 读取 | ||||
|     return s | ||||
| 
 | ||||
| 
 | ||||
| def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 | ||||
|     special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 | ||||
| def remove_punctuation(s):  # 这里是s是形参(parameter)。函数被调用时才给s赋值。 | ||||
|     special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|'  # 把里面的字符都去掉 | ||||
|     for c in special_characters: | ||||
|         s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 | ||||
|         s = s.replace(c, ' ')  # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 | ||||
|     s = s.replace('--', ' ') | ||||
|     s = s.strip() # 去除前后的空格 | ||||
|      | ||||
|     s = s.strip()  # 去除前后的空格 | ||||
| 
 | ||||
|     if '\'' in s: | ||||
|         n = len(s) | ||||
|         t = '' # 用来收集我需要保留的字符 | ||||
|         for i in range(n): # 只有单引号前后都有英文字符,才保留 | ||||
|         t = ''  # 用来收集我需要保留的字符 | ||||
|         for i in range(n):  # 只有单引号前后都有英文字符,才保留 | ||||
|             if s[i] == '\'': | ||||
|                 i_is_ok = i - 1 >= 0 and i + 1 < n | ||||
|                 if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters: | ||||
|                 if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters: | ||||
|                     t += s[i] | ||||
|             else: | ||||
|                 t += s[i] | ||||
|  | @ -60,12 +60,12 @@ def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用 | |||
|         return s | ||||
| 
 | ||||
| 
 | ||||
| def sort_in_descending_order(lst):# 单词按频率降序排列 | ||||
| def sort_in_descending_order(lst):  # 单词按频率降序排列 | ||||
|     lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0])) | ||||
|     return lst2 | ||||
| 
 | ||||
| 
 | ||||
| def sort_in_ascending_order(lst):# 单词按频率降序排列 | ||||
| def sort_in_ascending_order(lst):  # 单词按频率降序排列 | ||||
|     lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0])) | ||||
|     return lst2 | ||||
| 
 | ||||
|  | @ -80,31 +80,30 @@ def make_html_page(lst, fname):  # 只是在wordfreqCMD.py中的main函数中调 | |||
|         # <a href="">word</a> | ||||
|         s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1]) | ||||
|         count += 1 | ||||
|     f = open(fname, 'w') | ||||
|     f.write(s) | ||||
|     f.close() | ||||
|     with open(fname, 'w') as f: | ||||
|         f.write(s) | ||||
| 
 | ||||
| 
 | ||||
| ## main(程序入口) | ||||
| if __name__ == '__main__': | ||||
|     num = len(sys.argv) | ||||
| 
 | ||||
|     if num == 1: # 从键盘读入字符串 | ||||
|     if num == 1:  # 从键盘读入字符串 | ||||
|         s = input() | ||||
|     elif num == 2: # 从文件读入字符串 | ||||
|     elif num == 2:  # 从文件读入字符串 | ||||
|         fname = sys.argv[1] | ||||
|         s = file2str(fname) | ||||
|     else: | ||||
|         print('I can accept at most 2 arguments.') | ||||
|         sys.exit()# 结束程序运行, 下面的代码不会被执行了。 | ||||
|         sys.exit()  # 结束程序运行,下面的代码不会被执行了。 | ||||
| 
 | ||||
|     s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 | ||||
|     s = remove_punctuation(s)  # 这里是s是实参(argument),里面有值 | ||||
|     L = freq(s) | ||||
|     for x in sort_in_descending_order(L): | ||||
|         print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 | ||||
|         print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))  # 函数导出 | ||||
| 
 | ||||
|     # 把频率的结果放result.html中 | ||||
|     make_html_page(sort_in_descending_order(L), 'result.html')  | ||||
|     make_html_page(sort_in_descending_order(L), 'result.html') | ||||
| 
 | ||||
|     print('\nHistory:\n') | ||||
|     if os.path.exists('frequency.p'): | ||||
|  | @ -120,4 +119,3 @@ if __name__ == '__main__': | |||
|     pickle_idea.save_frequency_to_pickle(d, 'frequency.p') | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										2
									
								
								build.sh
								
								
								
								
							
							
						
						
									
										2
									
								
								build.sh
								
								
								
								
							|  | @ -1,6 +1,6 @@ | |||
| #!/bin/sh | ||||
| 
 | ||||
| DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal | ||||
| DEPLOYMENT_DIR=/home/main/EnglishPal | ||||
| cd $DEPLOYMENT_DIR | ||||
| pwd | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,4 +8,7 @@ Werkzeug==2.2.2 | |||
| ======= | ||||
| 
 | ||||
| pytest~=8.1.1 | ||||
| <<<<<<< HEAD | ||||
| >>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面) | ||||
| ======= | ||||
| >>>>>>> fa65055 (Fix bug 511) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue