From 8be875111d00e78413c73e4d6e1e43d658d622b6 Mon Sep 17 00:00:00 2001 From: xrj <2023438860@qq.com> Date: Fri, 30 May 2025 12:46:16 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86vocabulary=E5=92=8Ct?= =?UTF-8?q?est=5Fvocabulary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/test_vocabulary.py | 12 ++++++++ app/vocabulary.py | 63 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 app/test_vocabulary.py create mode 100644 app/vocabulary.py diff --git a/app/test_vocabulary.py b/app/test_vocabulary.py new file mode 100644 index 0000000..b3ed0f8 --- /dev/null +++ b/app/test_vocabulary.py @@ -0,0 +1,12 @@ +from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel + + +def test_article_level(): + ''' Boundary case test ''' + article = ArticleVocabularyLevel('') + assert article.level == 0 + +def test_user_level(): + ''' Boundary case test ''' + user = UserVocabularyLevel({}) + assert user.level == 0 \ No newline at end of file diff --git a/app/vocabulary.py b/app/vocabulary.py new file mode 100644 index 0000000..b6c2a08 --- /dev/null +++ b/app/vocabulary.py @@ -0,0 +1,63 @@ +''' + Estimate a user's vocabulary level given his vocabulary data + Estimate an English article's difficulty level given its content + Preliminary design + + Hui, 2024-09-23 + Last upated: 2024-09-25, 2024-09-30 +''' + +import pickle + + +def load_record(pickle_fname): + with open(pickle_fname, 'rb') as f: + d = pickle.load(f) + return d + + +class VocabularyLevelEstimator: + _test = load_record('words_and_tests.p') # map a word to the sources where it appears + + @property + def level(self): + total = len(self._test) + num = 0 + for word in self.word_lst: + num += 1 + if word in self._test: + print(f'{word} : {self._test[word]}') + else: + print(f'{word}') + if num == 0: + return 0 + return total/num + + +class UserVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, d): + self.d = d + self.word_lst = list(d.keys()) + # just look at the most recently-added words + + +class ArticleVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, content): + self.content = content + self.word_lst = content.lower().split() + # select the 10 most difficult words + + +if __name__ == '__main__': + d = load_record('frequency_zhangsan.pickle') + print(d) + #换行 + print('------------') + user = UserVocabularyLevel(d) + print(user.level) # level is a property + print('------------') + article = ArticleVocabularyLevel('This is an interesting article') + print(article.level) + + +