From a1255b2f3d5a1a8db7a41d1d31cde977a968c8f4 Mon Sep 17 00:00:00 2001
From: wanglulu <3409274047@qq.com>
Date: Mon, 9 Jun 2025 17:32:14 +0800
Subject: [PATCH] =?UTF-8?q?vocabulary=E6=9C=80=E7=BB=88=E7=89=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/test/article_test.p | Bin 0 -> 385 bytes
 app/vocabulary.py       |  33 +++++++++++++--------------------
 2 files changed, 13 insertions(+), 20 deletions(-)
 create mode 100644 app/test/article_test.p

diff --git a/app/test/article_test.p b/app/test/article_test.p
new file mode 100644
index 0000000000000000000000000000000000000000..4069c82bd6e9a4edc4c2bfe7fea1bbe5ad37f229
GIT binary patch
literal 385
zcmXAlO-=(b423~r#VOtas<{CR7D!ZLfz(A2J2!DBRx|OCpJ~gAL%@A;re4K)FYj~w
z^YFLa->vnbc6dpgN3P=Vgh1a6e@!&4?INRQZN(HyGErrZU2_3a@TLo6gZD_15aq<!
zo1PzGHEXU2GNCF;rR34(0lUB`PNTP!E*>=_sTpaJG~!GrP6(V=^Q(DDJZd0wFr2pN
zF;SwC2Rx&2mOj~6qntU~>jFAxtDSjhxm5IqMjLNfhuXqXjK;2k4W4zvXJ~<B({FHK
zYE_Oc!b!bCkFZ#}P!&z~dk?;t&<9<F&t<^Yn5l2XfSzY@D|!tNdd8f&S`OXfZY0~=
H@H_kieawRO

literal 0
HcmV?d00001

diff --git a/app/vocabulary.py b/app/vocabulary.py
index 4468d11..7e65d5b 100644
--- a/app/vocabulary.py
+++ b/app/vocabulary.py
@@ -22,14 +22,6 @@ def is_english_word(word):
     return bool(pattern.match(word))
 
 
-# 判断日期格式是否有效
-def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'):
-    try:
-        datetime.strptime(date_string, format)
-        return True
-    except ValueError:
-        return False
-
 
 # 去除非单词字符
 def remove_non_words(input_string):
@@ -41,19 +33,17 @@ def remove_non_words(input_string):
 # 主类：词汇水平估算器
 class VocabularyLevelEstimator:
     # 词汇表（单词：【"雅思","高考"...】）
-    _test = load_record('static\words_and_tests.p')  # 词汇到测试来源的映射
+    _test = load_record('static/words_and_tests.p')  # 词汇到测试来源的映射
 
     @property
     def level(self):
-        total = 0.0  # 总评分
-        num = 0  # 计算的单词数
+        total = 0.0
+        num = 0
         for word in self.word_lst:
-            num += 1
             if word in self._test:
-                print(f'{word} : {self._test[word]}')  # 输出单词及其来源
-            else:
-                print(f'{word}')  # 输出没有评分的单词
-        return total / num if num else 0.0  # 返回平均值
+                total += self._test[word]  # Assuming _test[word] returns a difficulty score
+            num += 1
+        return total / num if num > 0 else 0.0
 
     def get_word_level(self, word):
         # 常见高频词汇列表
@@ -91,15 +81,13 @@ class UserVocabularyLevel(VocabularyLevelEstimator):
             self.filter_user_frequency()
 
     def filter_user_frequency(self):
-        # 过滤出最近一周的生词，用于计算用户词汇水平
         stemmer = snowballstemmer.stemmer('english')
-        range_datetime = (datetime.now() - timedelta(days=7)).strftime('%Y%m%d%H%M')
 
         self.filtered_frequency = []
 
         for word in self.d:
-            if is_english_word(word) and is_valid_datetime_string(self.d[word][0]):
-                if self.d[word][0] > range_datetime and word not in self.filtered_frequency:
+            if is_english_word(word) :
+                if  word not in self.filtered_frequency:
                     self.filtered_frequency.append(stemmer.stemWord(word))
 
     @property
@@ -169,3 +157,8 @@ if __name__ == '__main__':
     with open('test/article_test.p', 'rb') as file:
         loaded_data = pickle.load(file)
         print(loaded_data)
+
+        article1 = ArticleVocabularyLevel('source')
+        article2 = ArticleVocabularyLevel('open source')
+
+