''' Estimate a user's vocabulary level given his vocabulary data Estimate an English article's difficulty level given its content Preliminary design Hui, 2024-09-23 Last upated: 2024-09-25, 2024-09-30 ''' import pickle def load_record(pickle_fname): with open(pickle_fname, 'rb') as f: d = pickle.load(f) return d _TEST_MOCK = { 'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2, 'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3, 'available': 4, 'organizations': 4, 'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable:': 6, 'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7, 'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained,': 5, 'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7, 'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7, 'xyz': 0, '': 0 } class VocabularyLevelEstimator: _test = _TEST_MOCK @property def level(self): total = 0.0 valid_count = 0 for word in self.word_lst: if word in self._test: total += self._test[word] valid_count += 1 # if valid_count >= 40: total += 10 print(f'valid_count: {valid_count}, total: {total}') if valid_count != 0 and total != 0: total += (valid_count * valid_count) / 100 return total / valid_count if valid_count > 0 else 0 class UserVocabularyLevel(VocabularyLevelEstimator): def __init__(self, d): self.d = d self.word_lst = list(d.keys()) # just look at the most recently-added words class ArticleVocabularyLevel(VocabularyLevelEstimator): def __init__(self, content): self.content = content self.word_lst = content.lower().split() # select the 10 most difficult words if __name__ == '__main__': d = load_record('frequency_mrlan85.pickle') print(d) user = UserVocabularyLevel(d) print(user.level) # level is a property article = ArticleVocabularyLevel('This is an interesting article') print(article.level)