From f8723ed4d21f3276cb659dbaac164ee4d6a2e53a Mon Sep 17 00:00:00 2001 From: sinksank <1007286821@qq.com> Date: Wed, 28 May 2025 12:28:08 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AC=AC=E4=BA=8C=E6=AC=A1=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=20vocabulary.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/vocabulary.py | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/app/vocabulary.py b/app/vocabulary.py index 17e4386..0ef490c 100644 --- a/app/vocabulary.py +++ b/app/vocabulary.py @@ -6,25 +6,24 @@ Hui, 2024-09-23 Last upated: 2024-09-25, 2024-09-30 ''' +import os import pickle +import random import re - +from collections import defaultdict +from datetime import datetime, timedelta +import heapq +import snowballstemmer +from flask import session +import enchant # word_lst = 词汇表 def load_record(pickle_fname): - try: - with open(pickle_fname, 'rb') as f: - d = pickle.load(f) - return d - except FileNotFoundError: - print("未发现文件") - except pickle.UnpicklingError: - print("无法打开文件") - except Exception as e: - print("文件处理异常") - return None + with open(pickle_fname, 'rb') as f: + d = pickle.load(f) + return d @@ -34,7 +33,7 @@ def is_english_word(word): if word == 'xyz': return False return bool(pattern.match(word)) - +t def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'): try: datetime.strptime(date_string, format) @@ -44,7 +43,10 @@ def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'): def remove_non_words(input_string): cleaned_string = re.sub(r'[^a-zA-Z\s]', '', input_string) - return cleaned_string.strip() + words = cleaned_string.split() + result = ' '.join(words) + return result + class VocabularyLevelEstimator: @@ -104,11 +106,8 @@ class UserVocabularyLevel(VocabularyLevelEstimator): return # 如果词库为空,直接返回 stemmer = snowballstemmer.stemmer('english') - #硬编码情况 # date_str = "20240805" - # 实际时间 - now = datetime.now() - date_str = now.strftime('%Y%m%d') + date_str = datetime.now().strftime('%Y%m%d') dt = datetime.strptime(date_str, "%Y%m%d") range_datetime = dt.strftime("%Y%m%d%H%M") # 设定筛选时间范围 @@ -216,10 +215,8 @@ class ArticleVocabularyLevel(VocabularyLevelEstimator): if __name__ == '__main__': - now = datetime.now() - print(now.strftime("%Y%m%d")) -# user = UserVocabularyLevel({}) -# print(user.level) + user = UserVocabularyLevel({}) + print(user.level) # _test = load_record('static/words_and_tests.p') # print(_test) #