From 8be875111d00e78413c73e4d6e1e43d658d622b6 Mon Sep 17 00:00:00 2001
From: xrj <2023438860@qq.com>
Date: Fri, 30 May 2025 12:46:16 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86vocabulary=E5=92=8Ct?=
 =?UTF-8?q?est=5Fvocabulary?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/test_vocabulary.py | 12 ++++++++
 app/vocabulary.py      | 63 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 app/test_vocabulary.py
 create mode 100644 app/vocabulary.py

diff --git a/app/test_vocabulary.py b/app/test_vocabulary.py
new file mode 100644
index 0000000..b3ed0f8
--- /dev/null
+++ b/app/test_vocabulary.py
@@ -0,0 +1,12 @@
+from vocabulary import UserVocabularyLevel, ArticleVocabularyLevel
+
+
+def test_article_level():
+    ''' Boundary case test '''
+    article = ArticleVocabularyLevel('')
+    assert article.level == 0
+    
+def test_user_level():
+    ''' Boundary case test '''
+    user = UserVocabularyLevel({})
+    assert user.level == 0
\ No newline at end of file
diff --git a/app/vocabulary.py b/app/vocabulary.py
new file mode 100644
index 0000000..b6c2a08
--- /dev/null
+++ b/app/vocabulary.py
@@ -0,0 +1,63 @@
+''' 
+   Estimate a user's vocabulary level given his vocabulary data
+   Estimate an English article's difficulty level given its content
+   Preliminary design
+   
+   Hui, 2024-09-23
+   Last upated: 2024-09-25, 2024-09-30
+'''
+
+import pickle
+
+
+def load_record(pickle_fname):
+    with open(pickle_fname, 'rb') as f:
+        d = pickle.load(f)
+    return d
+
+
+class VocabularyLevelEstimator:
+    _test = load_record('words_and_tests.p') # map a word to the sources where it appears
+
+    @property
+    def level(self):
+        total = len(self._test)
+        num = 0
+        for word in self.word_lst:
+            num += 1
+            if word in self._test:
+                print(f'{word} : {self._test[word]}')
+            else:
+                print(f'{word}')
+        if  num == 0:
+            return 0
+        return total/num
+
+
+class UserVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, d):
+        self.d = d
+        self.word_lst = list(d.keys())
+        # just look at the most recently-added words
+
+
+class ArticleVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, content):
+        self.content = content
+        self.word_lst = content.lower().split()
+        # select the 10 most difficult words
+        
+
+if __name__ == '__main__':
+    d = load_record('frequency_zhangsan.pickle')
+    print(d)
+    #换行
+    print('------------')
+    user = UserVocabularyLevel(d)
+    print(user.level) # level is a property
+    print('------------')
+    article = ArticleVocabularyLevel('This is an interesting article')
+    print(article.level)
+    
+    
+