添加了vocabulary和test_vocabulary

Bug585-chengchuhang
谢荣进 2025-05-30 12:46:16 +08:00
parent 3cffb14039
commit 8be875111d
2 changed files with 75 additions and 0 deletions

12
app/test_vocabulary.py Normal file
View File

@ -0,0 +1,12 @@
from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
def test_article_level():
''' Boundary case test '''
article = ArticleVocabularyLevel('')
assert article.level == 0
def test_user_level():
''' Boundary case test '''
user = UserVocabularyLevel({})
assert user.level == 0

63
app/vocabulary.py Normal file
View File

@ -0,0 +1,63 @@
'''
Estimate a user's vocabulary level given his vocabulary data
Estimate an English article's difficulty level given its content
Preliminary design
Hui, 2024-09-23
Last upated: 2024-09-25, 2024-09-30
'''
import pickle
def load_record(pickle_fname):
with open(pickle_fname, 'rb') as f:
d = pickle.load(f)
return d
class VocabularyLevelEstimator:
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
@property
def level(self):
total = len(self._test)
num = 0
for word in self.word_lst:
num += 1
if word in self._test:
print(f'{word} : {self._test[word]}')
else:
print(f'{word}')
if num == 0:
return 0
return total/num
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
self.d = d
self.word_lst = list(d.keys())
# just look at the most recently-added words
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
self.content = content
self.word_lst = content.lower().split()
# select the 10 most difficult words
if __name__ == '__main__':
d = load_record('frequency_zhangsan.pickle')
print(d)
#换行
print('------------')
user = UserVocabularyLevel(d)
print(user.level) # level is a property
print('------------')
article = ArticleVocabularyLevel('This is an interesting article')
print(article.level)