From a1255b2f3d5a1a8db7a41d1d31cde977a968c8f4 Mon Sep 17 00:00:00 2001 From: wanglulu <3409274047@qq.com> Date: Mon, 9 Jun 2025 17:32:14 +0800 Subject: [PATCH] =?UTF-8?q?vocabulary=E6=9C=80=E7=BB=88=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/test/article_test.p | Bin 0 -> 385 bytes app/vocabulary.py | 33 +++++++++++++-------------------- 2 files changed, 13 insertions(+), 20 deletions(-) create mode 100644 app/test/article_test.p diff --git a/app/test/article_test.p b/app/test/article_test.p new file mode 100644 index 0000000000000000000000000000000000000000..4069c82bd6e9a4edc4c2bfe7fea1bbe5ad37f229 GIT binary patch literal 385 zcmXAlO-=(b423~r#VOtas<{CR7D!ZLfz(A2J2!DBRx|OCpJ~gAL%@A;re4K)FYj~w z^YFLa->vnbc6dpgN3P=Vgh1a6e@!&4?INRQZN(HyGErrZU2_3a@TLo6gZD_15aq=_sTpaJG~!GrP6(V=^Q(DDJZd0wFr2pN zF;SwC2Rx&2mOj~6qntU~>jFAxtDSjhxm5IqMjLNfhuXqXjK;2k4W4zvXJ~ 0 else 0.0 def get_word_level(self, word): # 常见高频词汇列表 @@ -91,15 +81,13 @@ class UserVocabularyLevel(VocabularyLevelEstimator): self.filter_user_frequency() def filter_user_frequency(self): - # 过滤出最近一周的生词,用于计算用户词汇水平 stemmer = snowballstemmer.stemmer('english') - range_datetime = (datetime.now() - timedelta(days=7)).strftime('%Y%m%d%H%M') self.filtered_frequency = [] for word in self.d: - if is_english_word(word) and is_valid_datetime_string(self.d[word][0]): - if self.d[word][0] > range_datetime and word not in self.filtered_frequency: + if is_english_word(word) : + if word not in self.filtered_frequency: self.filtered_frequency.append(stemmer.stemWord(word)) @property @@ -169,3 +157,8 @@ if __name__ == '__main__': with open('test/article_test.p', 'rb') as file: loaded_data = pickle.load(file) print(loaded_data) + + article1 = ArticleVocabularyLevel('source') + article2 = ArticleVocabularyLevel('open source') + +