vocabulary
							parent
							
								
									d9512c929b
								
							
						
					
					
						commit
						6462ef87cc
					
				
										
											Binary file not shown.
										
									
								
							|  | @ -0,0 +1,61 @@ | |||
| import re | ||||
| import pickle | ||||
| 
 | ||||
| def load_record(pickle_fname): | ||||
|     with open(pickle_fname, 'rb') as f: | ||||
|         d = pickle.load(f) | ||||
|     return d | ||||
| 
 | ||||
| def convert_test_type_to_difficulty_level(d): | ||||
|     result = {} | ||||
|     for word, test_types in d.items(): | ||||
|         if 'CET4' in test_types: | ||||
|             result[word] = 4 | ||||
|         elif 'OXFORD3000' in test_types: | ||||
|             result[word] = 5 | ||||
|         elif 'CET6' in test_types or 'GRADUATE' in test_types: | ||||
|             result[word] = 6 | ||||
|         elif 'IELTS' in test_types: | ||||
|             result[word] = 7 | ||||
|         elif 'OXFORD5000' in test_types: | ||||
|             result[word] = 7 | ||||
|         elif 'BBC' in test_types: | ||||
|             result[word] = 8 | ||||
|         else: | ||||
|             result[word] = 0 | ||||
|     return result | ||||
| 
 | ||||
| class VocabularyLevelEstimator: | ||||
|     _test_raw = load_record('words_and_tests.p') | ||||
|     _difficulty_dict = convert_test_type_to_difficulty_level(_test_raw) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_word_level(cls, word): | ||||
|         return cls._difficulty_dict.get(word, 0) | ||||
| 
 | ||||
| class UserVocabularyLevel(VocabularyLevelEstimator): | ||||
|     def __init__(self, d): | ||||
|         self.d = d | ||||
|         word_time = [(word, max(times)) for word, times in d.items() if times] | ||||
|         sorted_words = sorted(word_time, key=lambda x: x[1], reverse=True) | ||||
|         self.recent_words = [word for word, _ in sorted_words[:3]] | ||||
| 
 | ||||
|     @property | ||||
|     def level(self): | ||||
|         levels = [self.get_word_level(word) for word in self.recent_words] | ||||
|         valid_levels = [lvl for lvl in levels if lvl > 0] | ||||
|         return sum(valid_levels) / len(valid_levels) if valid_levels else 0 | ||||
| 
 | ||||
| class ArticleVocabularyLevel(VocabularyLevelEstimator): | ||||
|     def __init__(self, content): | ||||
|         self.content = content | ||||
|         words = re.findall(r'\b[\w-]+\b', content.lower()) | ||||
|         word_levels = [self.get_word_level(word) for word in words] | ||||
|         valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True) | ||||
|         self.top_levels = valid_levels[:5]  # 取前5个最高难度单词 | ||||
| 
 | ||||
|     @property | ||||
|     def level(self): | ||||
|         if not self.top_levels: | ||||
|             return 0 | ||||
|         return sum(self.top_levels) / len(self.top_levels) | ||||
		Loading…
	
		Reference in New Issue