import pickle import re from collections import defaultdict def load_record(pickle_fname): with open(pickle_fname, 'rb') as f: d = pickle.load(f) return d class VocabularyLevelEstimator: _test = load_record('words_and_tests.p') # map a word to the sources where it appears def __init__(self): self.word_lst = [] def calculate_level(self, word): """Calculate difficulty level for a single word""" if word in self._test: if 'IELTS' in self._test[word]: return 6 elif 'BBC' in self._test[word]: return 5 elif 'CET6' in self._test[word]: return 4 elif 'CET4' in self._test[word]: return 3 elif 'OXFORD3000' in self._test[word]: return 2 else: return 1 else: return 0 @property def level(self): if not self.word_lst: return 0.0 # Calculate average difficulty of the words total = sum(self.calculate_level(word) for word in self.word_lst) return total / len(self.word_lst) class UserVocabularyLevel(VocabularyLevelEstimator): def __init__(self, d): super().__init__() self.d = d self.word_lst = list(d.keys()) @property def level(self): if not self.word_lst: return 0.0 # Only consider the most recent 3 words for user recent_words = self.word_lst[:3] # Calculate average difficulty of the recent words total = sum(self.calculate_level(word) for word in recent_words) return total / len(recent_words) class ArticleVocabularyLevel(VocabularyLevelEstimator): def __init__(self, content): super().__init__() self.content = content # Preprocess content: remove punctuation and split into words words = re.findall(r'\b[a-zA-Z]+\b', content.lower()) # Remove duplicates and sort by difficulty (descending) unique_words = list(dict.fromkeys(words)) unique_words.sort(key=lambda w: self.calculate_level(w), reverse=True) # Select top 10 difficult words self.word_lst = unique_words[:10] if __name__ == '__main__': # 示例用法 # d = load_record('frequency_mrlan85.pickle') # print(d) # user = UserVocabularyLevel(d) # print(user.level) # level is a property # article = ArticleVocabularyLevel('This is an interesting article') # print(article.level) pass