vocabulary最终版

2025-06-09 17:32:14 +08:00 · 2025-06-09 17:32:14 +08:00 · a1255b2f3d
parent cea015f18a
commit a1255b2f3d
2 changed files with 13 additions and 20 deletions
--- a/app/test/article_test.p
+++ b/app/test/article_test.p
--- a/app/vocabulary.py
+++ b/app/vocabulary.py
@ -22,14 +22,6 @@ def is_english_word(word):
    return bool(pattern.match(word))
 # 判断日期格式是否有效
 def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'):
    try:
        datetime.strptime(date_string, format)
        return True
    except ValueError:
        return False
 # 去除非单词字符
 def remove_non_words(input_string):
@ -41,19 +33,17 @@ def remove_non_words(input_string):
 # 主类：词汇水平估算器
 class VocabularyLevelEstimator:
    # 词汇表（单词：【"雅思","高考"...】）
-    _test = load_record('static\words_and_tests.p')  # 词汇到测试来源的映射
+    _test = load_record('static/words_and_tests.p')  # 词汇到测试来源的映射
    @property
    def level(self):
-        total = 0.0  # 总评分
+        total = 0.0
-        num = 0  # 计算的单词数
+        num = 0
        for word in self.word_lst:
            num += 1
            if word in self._test:
-                print(f'{word} : {self._test[word]}')  # 输出单词及其来源
+                total += self._test[word]  # Assuming _test[word] returns a difficulty score
-            else:
+            num += 1
-                print(f'{word}')  # 输出没有评分的单词
+        return total / num if num > 0 else 0.0
        return total / num if num else 0.0  # 返回平均值
    def get_word_level(self, word):
        # 常见高频词汇列表
@ -91,15 +81,13 @@ class UserVocabularyLevel(VocabularyLevelEstimator):
            self.filter_user_frequency()
    def filter_user_frequency(self):
        # 过滤出最近一周的生词，用于计算用户词汇水平
        stemmer = snowballstemmer.stemmer('english')
        range_datetime = (datetime.now() - timedelta(days=7)).strftime('%Y%m%d%H%M')
        self.filtered_frequency = []
        for word in self.d:
-            if is_english_word(word) and is_valid_datetime_string(self.d[word][0]):
+            if is_english_word(word) :
-                if self.d[word][0] > range_datetime and word not in self.filtered_frequency:
+                if  word not in self.filtered_frequency:
                    self.filtered_frequency.append(stemmer.stemWord(word))
    @property
@ -169,3 +157,8 @@ if __name__ == '__main__':
    with open('test/article_test.p', 'rb') as file:
        loaded_data = pickle.load(file)
        print(loaded_data)
        article1 = ArticleVocabularyLevel('source')
        article2 = ArticleVocabularyLevel('open source')