forked from mrlan/EnglishPal
				
			Fix bug 511
							parent
							
								
									fb7adc3f22
								
							
						
					
					
						commit
						5447d570e0
					
				| 
						 | 
					@ -2,17 +2,18 @@
 | 
				
			||||||
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 | 
					# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 | 
				
			||||||
# Written permission must be obtained from the author for commercial uses.
 | 
					# Written permission must be obtained from the author for commercial uses.
 | 
				
			||||||
###########################################################################
 | 
					###########################################################################
 | 
				
			||||||
 | 
					 | 
				
			||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
 | 
					from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class WordFreq:
 | 
					class WordFreq:
 | 
				
			||||||
    def __init__(self, s):
 | 
					    def __init__(self, s, max_word_length=30):
 | 
				
			||||||
        self.s = remove_punctuation(s)
 | 
					        self.s = remove_punctuation(s)
 | 
				
			||||||
 | 
					        self.max_word_length = max_word_length
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_freq(self):
 | 
					    def get_freq(self):
 | 
				
			||||||
        lst = []
 | 
					        lst = []
 | 
				
			||||||
        for t in freq(self.s):
 | 
					        for t in freq(self.s, self.max_word_length):
 | 
				
			||||||
            word = t[0]
 | 
					            word = t[0]
 | 
				
			||||||
            if len(word) > 0 and word[0] in string.ascii_letters:
 | 
					            if len(word) > 0 and word[0] in string.ascii_letters:
 | 
				
			||||||
                lst.append(t)
 | 
					                lst.append(t)
 | 
				
			||||||
| 
						 | 
					@ -20,6 +21,5 @@ class WordFreq:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.')
 | 
					    f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
 | 
				
			||||||
    print(f.get_freq())
 | 
					    print(f.get_freq())
 | 
				
			||||||
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,56 @@
 | 
				
			||||||
 | 
					###########################################################################
 | 
				
			||||||
 | 
					# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 | 
				
			||||||
 | 
					# Written permission must be obtained from the author for commercial uses.
 | 
				
			||||||
 | 
					###########################################################################
 | 
				
			||||||
 | 
					import unittest
 | 
				
			||||||
 | 
					from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
 | 
				
			||||||
 | 
					from WordFreq import WordFreq
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestWordFrequency(unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_normal_case(self):
 | 
				
			||||||
 | 
					        text = "BANANA; Banana, apple ORANGE Banana banana."
 | 
				
			||||||
 | 
					        wf = WordFreq(text)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = [('banana', 4), ('orange', 1), ('apple', 1)]
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_with_long_word(self):
 | 
				
			||||||
 | 
					        text = "apple banana " + "a" * 31 + " orange banana apple"
 | 
				
			||||||
 | 
					        wf = WordFreq(text, max_word_length=30)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = [('banana', 2), ('apple', 2), ('orange', 1)]
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_all_long_words(self):
 | 
				
			||||||
 | 
					        text = "a" * 31 + " " + "b" * 32 + " " + "c" * 33
 | 
				
			||||||
 | 
					        wf = WordFreq(text, max_word_length=30)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = []
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_with_punctuation(self):
 | 
				
			||||||
 | 
					        text = "Hello, world! Hello... hello; 'hello' --world--"
 | 
				
			||||||
 | 
					        wf = WordFreq(text)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = [('hello', 4), ('world', 2)]
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_empty_string(self):
 | 
				
			||||||
 | 
					        text = ""
 | 
				
			||||||
 | 
					        wf = WordFreq(text)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = []
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_word_frequency_with_max_length_parameter(self):
 | 
				
			||||||
 | 
					        text = "apple banana apple"
 | 
				
			||||||
 | 
					        wf = WordFreq(text, max_word_length=5)
 | 
				
			||||||
 | 
					        result = wf.get_freq()
 | 
				
			||||||
 | 
					        expected = [('apple', 2)]
 | 
				
			||||||
 | 
					        self.assertEqual(result, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    unittest.main()
 | 
				
			||||||
| 
						 | 
					@ -2,19 +2,19 @@
 | 
				
			||||||
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 | 
					# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
 | 
				
			||||||
# Written permission must be obtained from the author for commercial uses.
 | 
					# Written permission must be obtained from the author for commercial uses.
 | 
				
			||||||
###########################################################################
 | 
					###########################################################################
 | 
				
			||||||
 | 
					 | 
				
			||||||
import collections
 | 
					import collections
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
import operator
 | 
					import os
 | 
				
			||||||
import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。
 | 
					import sys
 | 
				
			||||||
import pickle_idea
 | 
					import pickle_idea
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def freq(fruit):
 | 
					
 | 
				
			||||||
 | 
					def freq(fruit, max_word_length=30):
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    功能: 把字符串转成列表。 目的是得到每个单词的频率。
 | 
					    功能: 把字符串转成列表。 目的是得到每个单词的频率。
 | 
				
			||||||
    输入: 字符串
 | 
					    输入: 字符串
 | 
				
			||||||
    输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
 | 
					    输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)]
 | 
				
			||||||
    注意事项: 首先要把字符串转成小写。原因是。。。
 | 
					    注意事项: 首先要把字符串转小写。
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    result = []
 | 
					    result = []
 | 
				
			||||||
| 
						 | 
					@ -22,24 +22,24 @@ def freq(fruit):
 | 
				
			||||||
    fruit = fruit.lower()  # 字母转小写
 | 
					    fruit = fruit.lower()  # 字母转小写
 | 
				
			||||||
    flst = fruit.split()  # 字符串转成list
 | 
					    flst = fruit.split()  # 字符串转成list
 | 
				
			||||||
    c = collections.Counter(flst)
 | 
					    c = collections.Counter(flst)
 | 
				
			||||||
    result = c.most_common()
 | 
					    for word, count in c.most_common():
 | 
				
			||||||
 | 
					        if len(word) <= max_word_length:
 | 
				
			||||||
 | 
					            result.append((word, count))
 | 
				
			||||||
    return result
 | 
					    return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
def youdao_link(s):  # 有道链接
 | 
					def youdao_link(s):  # 有道链接
 | 
				
			||||||
    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'  # 网址
 | 
					    link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'  # 网址
 | 
				
			||||||
    return link
 | 
					    return link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def file2str(fname):  # 文件转字符
 | 
					def file2str(fname):  # 文件转字符
 | 
				
			||||||
    f = open(fname) #打开
 | 
					    with open(fname) as f:  # 使用with打开文件
 | 
				
			||||||
        s = f.read()  # 读取
 | 
					        s = f.read()  # 读取
 | 
				
			||||||
    f.close()       #关闭
 | 
					 | 
				
			||||||
    return s
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def remove_punctuation(s):  # 这里是s是形参(parameter)。函数被调用时才给s赋值。
 | 
					def remove_punctuation(s):  # 这里是s是形参(parameter)。函数被调用时才给s赋值。
 | 
				
			||||||
    special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
 | 
					    special_characters = r'\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|'  # 把里面的字符都去掉
 | 
				
			||||||
    for c in special_characters:
 | 
					    for c in special_characters:
 | 
				
			||||||
        s = s.replace(c, ' ')  # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
 | 
					        s = s.replace(c, ' ')  # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
 | 
				
			||||||
    s = s.replace('--', ' ')
 | 
					    s = s.replace('--', ' ')
 | 
				
			||||||
| 
						 | 
					@ -80,9 +80,8 @@ def make_html_page(lst, fname):  # 只是在wordfreqCMD.py中的main函数中调
 | 
				
			||||||
        # <a href="">word</a>
 | 
					        # <a href="">word</a>
 | 
				
			||||||
        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
 | 
					        s += '<p>%d <a href="%s">%s</a> (%d)</p>' % (count, youdao_link(x[0]), x[0], x[1])
 | 
				
			||||||
        count += 1
 | 
					        count += 1
 | 
				
			||||||
    f = open(fname, 'w')
 | 
					    with open(fname, 'w') as f:
 | 
				
			||||||
        f.write(s)
 | 
					        f.write(s)
 | 
				
			||||||
    f.close()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## main(程序入口)
 | 
					## main(程序入口)
 | 
				
			||||||
| 
						 | 
					@ -120,4 +119,3 @@ if __name__ == '__main__':
 | 
				
			||||||
    pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
 | 
					    pickle_idea.save_frequency_to_pickle(d, 'frequency.p')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										2
									
								
								build.sh
								
								
								
								
							
							
						
						
									
										2
									
								
								build.sh
								
								
								
								
							| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
#!/bin/sh
 | 
					#!/bin/sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEPLOYMENT_DIR=/home/lanhui/englishpal2/EnglishPal
 | 
					DEPLOYMENT_DIR=/home/main/EnglishPal
 | 
				
			||||||
cd $DEPLOYMENT_DIR
 | 
					cd $DEPLOYMENT_DIR
 | 
				
			||||||
pwd
 | 
					pwd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,4 +8,7 @@ Werkzeug==2.2.2
 | 
				
			||||||
=======
 | 
					=======
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pytest~=8.1.1
 | 
					pytest~=8.1.1
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
>>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
 | 
					>>>>>>> 8cbc7c9 (修复快速点击下一页按钮点击频率过快时页面跳转到未知名页面)
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					>>>>>>> fa65055 (Fix bug 511)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue