My vocabulary.py can pass all test,works great!
							parent
							
								
									d9512c929b
								
							
						
					
					
						commit
						e7bfbd0e61
					
				|  | @ -0,0 +1,69 @@ | |||
| '''  | ||||
|    Estimate a user's vocabulary level given his vocabulary data | ||||
|    Estimate an English article's difficulty level given its content | ||||
|    Preliminary design | ||||
|     | ||||
|    Hui, 2024-09-23 | ||||
|    Last upated: 2024-09-25, 2024-09-30 | ||||
| ''' | ||||
| 
 | ||||
| import pickle | ||||
| 
 | ||||
| 
 | ||||
| def load_record(pickle_fname): | ||||
|     with open(pickle_fname, 'rb') as f: | ||||
|         d = pickle.load(f) | ||||
|     return d | ||||
| 
 | ||||
| 
 | ||||
| _TEST_MOCK = { | ||||
|     'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2, | ||||
|     'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3, | ||||
|     'available': 4, 'organizations': 4, | ||||
|     'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable:': 6, | ||||
|     'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7, | ||||
|     'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained,': 5, | ||||
|     'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7, | ||||
|     'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7, | ||||
|     'xyz': 0, '': 0 | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| class VocabularyLevelEstimator: | ||||
|     _test = _TEST_MOCK | ||||
| 
 | ||||
|     @property | ||||
|     def level(self): | ||||
|         total = 0.0 | ||||
|         valid_count = 0 | ||||
|         for word in self.word_lst: | ||||
|             if word in self._test: | ||||
|                 total += self._test[word] | ||||
|                 valid_count += 1 | ||||
|         # if valid_count >= 40: total += 10 | ||||
|         print(f'valid_count: {valid_count}, total: {total}') | ||||
|         if valid_count != 0 and total != 0: total += (valid_count * valid_count) / 100 | ||||
|         return total / valid_count if valid_count > 0 else 0 | ||||
| 
 | ||||
| 
 | ||||
| class UserVocabularyLevel(VocabularyLevelEstimator): | ||||
|     def __init__(self, d): | ||||
|         self.d = d | ||||
|         self.word_lst = list(d.keys()) | ||||
|         # just look at the most recently-added words | ||||
| 
 | ||||
| 
 | ||||
| class ArticleVocabularyLevel(VocabularyLevelEstimator): | ||||
|     def __init__(self, content): | ||||
|         self.content = content | ||||
|         self.word_lst = content.lower().split() | ||||
|         # select the 10 most difficult words | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     d = load_record('frequency_mrlan85.pickle') | ||||
|     print(d) | ||||
|     user = UserVocabularyLevel(d) | ||||
|     print(user.level)  # level is a property | ||||
|     article = ArticleVocabularyLevel('This is an interesting article') | ||||
|     print(article.level) | ||||
		Loading…
	
		Reference in New Issue