import re import pickle def load_record(pickle_fname): with open(pickle_fname, 'rb') as f: d = pickle.load(f) return d def convert_test_type_to_difficulty_level(d): result = {} for word, test_types in d.items(): if 'CET4' in test_types: result[word] = 4 elif 'OXFORD3000' in test_types: result[word] = 5 elif 'CET6' in test_types or 'GRADUATE' in test_types: result[word] = 6 elif 'IELTS' in test_types: result[word] = 7 elif 'OXFORD5000' in test_types: result[word] = 7 elif 'BBC' in test_types: result[word] = 8 else: result[word] = 0 return result class VocabularyLevelEstimator: _test_raw = load_record('words_and_tests.p') _difficulty_dict = convert_test_type_to_difficulty_level(_test_raw) @classmethod def get_word_level(cls, word): return cls._difficulty_dict.get(word, 0) class UserVocabularyLevel(VocabularyLevelEstimator): def __init__(self, d): self.d = d word_time = [(word, max(times)) for word, times in d.items() if times] sorted_words = sorted(word_time, key=lambda x: x[1], reverse=True) self.recent_words = [word for word, _ in sorted_words[:3]] @property def level(self): levels = [self.get_word_level(word) for word in self.recent_words] valid_levels = [lvl for lvl in levels if lvl > 0] return sum(valid_levels) / len(valid_levels) if valid_levels else 0 class ArticleVocabularyLevel(VocabularyLevelEstimator): def __init__(self, content): self.content = content words = re.findall(r'\b[\w-]+\b', content.lower()) word_levels = [self.get_word_level(word) for word in words] valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True) self.top_levels = valid_levels[:5] # 取前5个最高难度单词 @property def level(self): if not self.top_levels: return 0 return sum(self.top_levels) / len(self.top_levels)