2021-04-06 16:22:03 +08:00
|
|
|
###########################################################################
|
|
|
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
|
|
|
# Written permission must be obtained from the author for commercial uses.
|
|
|
|
###########################################################################
|
|
|
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
|
|
|
|
import string
|
|
|
|
|
2024-06-14 20:14:28 +08:00
|
|
|
|
2021-04-06 16:22:03 +08:00
|
|
|
class WordFreq:
|
2024-06-14 20:14:28 +08:00
|
|
|
def __init__(self, s, max_word_length=30):
|
2021-04-06 16:22:03 +08:00
|
|
|
self.s = remove_punctuation(s)
|
2024-06-14 20:14:28 +08:00
|
|
|
self.max_word_length = max_word_length
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
def get_freq(self):
|
|
|
|
lst = []
|
2024-06-14 20:14:28 +08:00
|
|
|
for t in freq(self.s, self.max_word_length):
|
2021-04-06 16:22:03 +08:00
|
|
|
word = t[0]
|
|
|
|
if len(word) > 0 and word[0] in string.ascii_letters:
|
|
|
|
lst.append(t)
|
|
|
|
return sort_in_descending_order(lst)
|
2024-06-14 20:14:28 +08:00
|
|
|
|
2021-04-06 16:22:03 +08:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2024-06-14 20:14:28 +08:00
|
|
|
f = WordFreq('BANANA; Banana, apple ORANGE Banana banana.', max_word_length=30)
|
2021-04-06 16:22:03 +08:00
|
|
|
print(f.get_freq())
|