第二次修改 vocabulary.py

Bug585-WangZixiang
王子翔 2025-05-28 12:28:08 +08:00
parent 40e537af9f
commit f8723ed4d2
1 changed files with 19 additions and 22 deletions

View File

@ -6,25 +6,24 @@
Hui, 2024-09-23 Hui, 2024-09-23
Last upated: 2024-09-25, 2024-09-30 Last upated: 2024-09-25, 2024-09-30
''' '''
import os
import pickle import pickle
import random
import re import re
from collections import defaultdict
from datetime import datetime, timedelta
import heapq
import snowballstemmer
from flask import session
import enchant
# word_lst = 词汇表 # word_lst = 词汇表
def load_record(pickle_fname): def load_record(pickle_fname):
try:
with open(pickle_fname, 'rb') as f: with open(pickle_fname, 'rb') as f:
d = pickle.load(f) d = pickle.load(f)
return d return d
except FileNotFoundError:
print("未发现文件")
except pickle.UnpicklingError:
print("无法打开文件")
except Exception as e:
print("文件处理异常")
return None
@ -34,7 +33,7 @@ def is_english_word(word):
if word == 'xyz': if word == 'xyz':
return False return False
return bool(pattern.match(word)) return bool(pattern.match(word))
t
def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'): def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'):
try: try:
datetime.strptime(date_string, format) datetime.strptime(date_string, format)
@ -44,7 +43,10 @@ def is_valid_datetime_string(date_string, format='%Y%m%d%H%M'):
def remove_non_words(input_string): def remove_non_words(input_string):
cleaned_string = re.sub(r'[^a-zA-Z\s]', '', input_string) cleaned_string = re.sub(r'[^a-zA-Z\s]', '', input_string)
return cleaned_string.strip() words = cleaned_string.split()
result = ' '.join(words)
return result
class VocabularyLevelEstimator: class VocabularyLevelEstimator:
@ -104,11 +106,8 @@ class UserVocabularyLevel(VocabularyLevelEstimator):
return # 如果词库为空,直接返回 return # 如果词库为空,直接返回
stemmer = snowballstemmer.stemmer('english') stemmer = snowballstemmer.stemmer('english')
#硬编码情况
# date_str = "20240805" # date_str = "20240805"
# 实际时间 date_str = datetime.now().strftime('%Y%m%d')
now = datetime.now()
date_str = now.strftime('%Y%m%d')
dt = datetime.strptime(date_str, "%Y%m%d") dt = datetime.strptime(date_str, "%Y%m%d")
range_datetime = dt.strftime("%Y%m%d%H%M") # 设定筛选时间范围 range_datetime = dt.strftime("%Y%m%d%H%M") # 设定筛选时间范围
@ -216,10 +215,8 @@ class ArticleVocabularyLevel(VocabularyLevelEstimator):
if __name__ == '__main__': if __name__ == '__main__':
now = datetime.now() user = UserVocabularyLevel({})
print(now.strftime("%Y%m%d")) print(user.level)
# user = UserVocabularyLevel({})
# print(user.level)
# _test = load_record('static/words_and_tests.p') # _test = load_record('static/words_and_tests.p')
# print(_test) # print(_test)
# #