EnglishPal/app/vocabulary.py

61 lines
2.3 KiB
Python

import pickle
import re
# 模拟的测试数据,实际使用时应从文件加载
_TEST_MOCK = {
'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2,
'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3,
'available': 4, 'organizations': 4,
'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable': 6,
'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7,
'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained': 5,
'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7,
'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7,
'xyz': 0, '': 0
}
def load_record(pickle_fname):
try:
with open(pickle_fname, 'rb') as f:
d = pickle.load(f)
return d
except FileNotFoundError:
return _TEST_MOCK
class VocabularyLevelEstimator:
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
@property
def level(self):
total = 0.0
valid_count = 0
for word in self.word_lst:
if word in self._test:
total += self._test[word]
valid_count += 1
if valid_count != 0 and total != 0:
total += (valid_count * valid_count) / 100
return total / valid_count if valid_count > 0 else 0
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
if not isinstance(d, dict):
raise TypeError("Input must be a dictionary")
self.d = d
self.word_lst = list(d.keys())
# just look at the most recently-added words
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
if not isinstance(content, str):
raise TypeError("Content must be a string")
self.content = content
self.word_lst = re.findall(r'\b[a-zA-Z]+\b', content.lower())
if __name__ == '__main__':
d = load_record('frequency_mrlan85.pickle')
print(d)
user = UserVocabularyLevel(d)
print(user.level) # level is a property
article = ArticleVocabularyLevel('This is an interesting article')
print(article.level)