diff --git a/app/test/article_test.p b/app/test/article_test.p new file mode 100644 index 0000000..4069c82 Binary files /dev/null and b/app/test/article_test.p differ diff --git a/app/vocabulary.py b/app/vocabulary.py index 4468d11..7e65d5b 100644 --- a/app/vocabulary.py +++ b/app/vocabulary.py @@ -22,14 +22,6 @@ def is_english_word(word): return bool(pattern.match(word)) -# 判断日期格式是否有效 -def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'): - try: - datetime.strptime(date_string, format) - return True - except ValueError: - return False - # 去除非单词字符 def remove_non_words(input_string): @@ -41,19 +33,17 @@ def remove_non_words(input_string): # 主类:词汇水平估算器 class VocabularyLevelEstimator: # 词汇表(单词:【"雅思","高考"...】) - _test = load_record('static\words_and_tests.p') # 词汇到测试来源的映射 + _test = load_record('static/words_and_tests.p') # 词汇到测试来源的映射 @property def level(self): - total = 0.0 # 总评分 - num = 0 # 计算的单词数 + total = 0.0 + num = 0 for word in self.word_lst: - num += 1 if word in self._test: - print(f'{word} : {self._test[word]}') # 输出单词及其来源 - else: - print(f'{word}') # 输出没有评分的单词 - return total / num if num else 0.0 # 返回平均值 + total += self._test[word] # Assuming _test[word] returns a difficulty score + num += 1 + return total / num if num > 0 else 0.0 def get_word_level(self, word): # 常见高频词汇列表 @@ -91,15 +81,13 @@ class UserVocabularyLevel(VocabularyLevelEstimator): self.filter_user_frequency() def filter_user_frequency(self): - # 过滤出最近一周的生词,用于计算用户词汇水平 stemmer = snowballstemmer.stemmer('english') - range_datetime = (datetime.now() - timedelta(days=7)).strftime('%Y%m%d%H%M') self.filtered_frequency = [] for word in self.d: - if is_english_word(word) and is_valid_datetime_string(self.d[word][0]): - if self.d[word][0] > range_datetime and word not in self.filtered_frequency: + if is_english_word(word) : + if word not in self.filtered_frequency: self.filtered_frequency.append(stemmer.stemWord(word)) @property @@ -169,3 +157,8 @@ if __name__ == '__main__': with open('test/article_test.p', 'rb') as file: loaded_data = pickle.load(file) print(loaded_data) + + article1 = ArticleVocabularyLevel('source') + article2 = ArticleVocabularyLevel('open source') + +