请老师查阅我们组更新的vocabulary.py

Bug585-ChenJia
陈佳 2025-05-29 15:01:16 +08:00
parent d9512c929b
commit d8e5fa4f54
1 changed files with 117 additions and 0 deletions

117
app/vocabulary(2).py Normal file
View File

@ -0,0 +1,117 @@
import pickle
import string
import os
def read_pickle(file_name):
"""Read data from a pickle file if it exists."""
if not os.path.isfile(file_name):
print(f"File {file_name} does not exist.")
return {}
with open(file_name, 'rb') as f:
return pickle.load(f)
class VocabularyLevelEstimator:
# Load the word database for testing
_word_db = read_pickle('words_and_tests.p')
def evaluate_word_difficulty(self, word):
"""Evaluate the difficulty level of a word."""
# Handle empty strings, punctuation only, or purely numeric strings.
if not word or all(ch in string.punctuation for ch in word) or word.isdigit():
return 0
return 5 if word in self._word_db else 0
@property
def difficulty_level(self):
return self.calculate_longest_words_level()
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, vocab_dict):
self.vocab_dict = vocab_dict
# Use the last three words from the vocabulary dictionary.
self.recent_words = list(vocab_dict.keys())[-3:]
@property
def level(self):
"""Calculate the user's vocabulary level considering word validity."""
valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0]
valid_count = len(valid_words)
if valid_count == 0:
return 0
# Score for a single valid word
if valid_count == 1:
return self.score_single_word(valid_words[0])
# Logic for scoring multiple valid words
return self.score_multiple_words(valid_words)
def score_single_word(self, word):
"""Evaluate the score of a single word."""
length = len(word)
if length < 7:
return 2
elif 7 <= length < 8:
return 5
return 6
def score_multiple_words(self, valid_words):
"""Calculate the score for multiple valid words."""
total_score = 0
for word in valid_words:
length = len(word)
if length < 5:
total_score += 1
elif 5 <= length < 8:
total_score += 3
else:
total_score += 5
average_score = total_score / len(valid_words)
return min(int(average_score * 1.6), 8)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, article_text):
self.article_text = article_text
# Extract valid words and strip punctuation.
self.words = [
word.strip(string.punctuation)
for word in article_text.lower().split()
if word.strip(string.punctuation)
]
@property
def level(self):
"""Evaluate the article difficulty based on the longest ten valid words."""
valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0]
longest_words = sorted(valid_words, key=len, reverse=True)[:10]
if not longest_words:
return 0
difficulty_ratings = []
for word in longest_words:
length = len(word)
if length < 5:
difficulty_ratings.append(0.1)
elif 5 <= length < 8:
difficulty_ratings.append(0.2)
elif 8 <= length < 11:
difficulty_ratings.append(0.3)
else:
difficulty_ratings.append(0.5)
return sum(difficulty_ratings) + 2
if __name__ == '__main__':
vocab_dict = read_pickle('frequency_mrlan85.pickle')
print(vocab_dict)
user = UserVocabularyLevel(vocab_dict)
print(user.level)
article = ArticleVocabularyLevel('This is an interesting article.')
print(article.level)