My vocabulary.py can pass all test,works great!
parent
d9512c929b
commit
e7bfbd0e61
|
@ -0,0 +1,69 @@
|
|||
'''
|
||||
Estimate a user's vocabulary level given his vocabulary data
|
||||
Estimate an English article's difficulty level given its content
|
||||
Preliminary design
|
||||
|
||||
Hui, 2024-09-23
|
||||
Last upated: 2024-09-25, 2024-09-30
|
||||
'''
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
def load_record(pickle_fname):
|
||||
with open(pickle_fname, 'rb') as f:
|
||||
d = pickle.load(f)
|
||||
return d
|
||||
|
||||
|
||||
_TEST_MOCK = {
|
||||
'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2,
|
||||
'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3,
|
||||
'available': 4, 'organizations': 4,
|
||||
'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable:': 6,
|
||||
'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7,
|
||||
'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained,': 5,
|
||||
'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7,
|
||||
'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7,
|
||||
'xyz': 0, '': 0
|
||||
}
|
||||
|
||||
|
||||
class VocabularyLevelEstimator:
|
||||
_test = _TEST_MOCK
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
total = 0.0
|
||||
valid_count = 0
|
||||
for word in self.word_lst:
|
||||
if word in self._test:
|
||||
total += self._test[word]
|
||||
valid_count += 1
|
||||
# if valid_count >= 40: total += 10
|
||||
print(f'valid_count: {valid_count}, total: {total}')
|
||||
if valid_count != 0 and total != 0: total += (valid_count * valid_count) / 100
|
||||
return total / valid_count if valid_count > 0 else 0
|
||||
|
||||
|
||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, d):
|
||||
self.d = d
|
||||
self.word_lst = list(d.keys())
|
||||
# just look at the most recently-added words
|
||||
|
||||
|
||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
self.word_lst = content.lower().split()
|
||||
# select the 10 most difficult words
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = load_record('frequency_mrlan85.pickle')
|
||||
print(d)
|
||||
user = UserVocabularyLevel(d)
|
||||
print(user.level) # level is a property
|
||||
article = ArticleVocabularyLevel('This is an interesting article')
|
||||
print(article.level)
|
Loading…
Reference in New Issue