From 219fdbc4eafe821dc26de9fb75d92692b9b8834b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=92=9F=E7=82=BD=E7=8E=AE?=
 <14937682+zhong-chiwei@user.noreply.gitee.com>
Date: Sat, 14 Jun 2025 18:45:36 +0800
Subject: [PATCH] Add test_vocabulary_output_2025_06_05.txt

---
 test_vocabulary_output_2025_06_05.txt | 84 +++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 test_vocabulary_output_2025_06_05.txt

diff --git a/test_vocabulary_output_2025_06_05.txt b/test_vocabulary_output_2025_06_05.txt
new file mode 100644
index 0000000..36ad09f
--- /dev/null
+++ b/test_vocabulary_output_2025_06_05.txt
@@ -0,0 +1,84 @@
+import pickle
+import re
+from collections import defaultdict
+
+def load_record(pickle_fname):
+    with open(pickle_fname, 'rb') as f:
+        d = pickle.load(f)
+    return d
+
+class VocabularyLevelEstimator:
+    _test = load_record('words_and_tests.p')  # map a word to the sources where it appears
+
+    def __init__(self):
+        self.word_lst = []
+
+    def calculate_level(self, word):
+        """Calculate difficulty level for a single word"""
+        if word in self._test:
+            if 'IELTS' in self._test[word]:
+                return 6
+            elif 'BBC' in self._test[word]:
+                return 5
+            elif 'CET6' in self._test[word]:
+                return 4
+            elif 'CET4' in self._test[word]:
+                return 3
+            elif 'OXFORD3000' in self._test[word]:
+                return 2
+            else:
+                return 1
+        else:
+            return 0
+
+    @property
+    def level(self):
+        if not self.word_lst:
+            return 0.0
+
+        # Calculate average difficulty of the words
+        total = sum(self.calculate_level(word) for word in self.word_lst)
+        return total / len(self.word_lst)
+
+class UserVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, d):
+        super().__init__()
+        self.d = d
+        self.word_lst = list(d.keys())
+
+    @property
+    def level(self):
+        if not self.word_lst:
+            return 0.0
+
+        # Only consider the most recent 3 words for user
+        recent_words = self.word_lst[:3]
+
+        # Calculate average difficulty of the recent words
+        total = sum(self.calculate_level(word) for word in recent_words)
+        return total / len(recent_words)
+
+class ArticleVocabularyLevel(VocabularyLevelEstimator):
+    def __init__(self, content):
+        super().__init__()
+        self.content = content
+
+        # Preprocess content: remove punctuation and split into words
+        words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
+
+        # Remove duplicates and sort by difficulty (descending)
+        unique_words = list(dict.fromkeys(words))
+        unique_words.sort(key=lambda w: self.calculate_level(w), reverse=True)
+
+        # Select top 10 difficult words
+        self.word_lst = unique_words[:10]
+
+if __name__ == '__main__':
+    # 示例用法
+    # d = load_record('frequency_mrlan85.pickle')
+    # print(d)
+    # user = UserVocabularyLevel(d)
+    # print(user.level)  # level is a property
+    # article = ArticleVocabularyLevel('This is an interesting article')
+    # print(article.level)
+    pass
\ No newline at end of file