import pickle import re # 模拟的测试数据,实际使用时应从文件加载 _TEST_MOCK = { 'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2, 'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3, 'available': 4, 'organizations': 4, 'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable': 6, 'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7, 'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained': 5, 'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7, 'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7, 'xyz': 0, '': 0 } def load_record(pickle_fname): try: with open(pickle_fname, 'rb') as f: d = pickle.load(f) return d except FileNotFoundError: return _TEST_MOCK class VocabularyLevelEstimator: _test = load_record('words_and_tests.p') # map a word to the sources where it appears @property def level(self): total = 0.0 valid_count = 0 for word in self.word_lst: if word in self._test: total += self._test[word] valid_count += 1 if valid_count != 0 and total != 0: total += (valid_count * valid_count) / 100 return total / valid_count if valid_count > 0 else 0 class UserVocabularyLevel(VocabularyLevelEstimator): def __init__(self, d): if not isinstance(d, dict): raise TypeError("Input must be a dictionary") self.d = d self.word_lst = list(d.keys()) # just look at the most recently-added words class ArticleVocabularyLevel(VocabularyLevelEstimator): def __init__(self, content): if not isinstance(content, str): raise TypeError("Content must be a string") self.content = content self.word_lst = re.findall(r'\b[a-zA-Z]+\b', content.lower()) if __name__ == '__main__': d = load_record('frequency_mrlan85.pickle') print(d) user = UserVocabularyLevel(d) print(user.level) # level is a property article = ArticleVocabularyLevel('This is an interesting article') print(article.level)