请老师查阅我们组更新的vocabulary.py
parent
d9512c929b
commit
d8e5fa4f54
|
@ -0,0 +1,117 @@
|
||||||
|
import pickle
|
||||||
|
import string
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def read_pickle(file_name):
|
||||||
|
"""Read data from a pickle file if it exists."""
|
||||||
|
if not os.path.isfile(file_name):
|
||||||
|
print(f"File {file_name} does not exist.")
|
||||||
|
return {}
|
||||||
|
with open(file_name, 'rb') as f:
|
||||||
|
return pickle.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
class VocabularyLevelEstimator:
|
||||||
|
# Load the word database for testing
|
||||||
|
_word_db = read_pickle('words_and_tests.p')
|
||||||
|
|
||||||
|
def evaluate_word_difficulty(self, word):
|
||||||
|
"""Evaluate the difficulty level of a word."""
|
||||||
|
# Handle empty strings, punctuation only, or purely numeric strings.
|
||||||
|
if not word or all(ch in string.punctuation for ch in word) or word.isdigit():
|
||||||
|
return 0
|
||||||
|
return 5 if word in self._word_db else 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def difficulty_level(self):
|
||||||
|
return self.calculate_longest_words_level()
|
||||||
|
|
||||||
|
|
||||||
|
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||||
|
def __init__(self, vocab_dict):
|
||||||
|
self.vocab_dict = vocab_dict
|
||||||
|
# Use the last three words from the vocabulary dictionary.
|
||||||
|
self.recent_words = list(vocab_dict.keys())[-3:]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def level(self):
|
||||||
|
"""Calculate the user's vocabulary level considering word validity."""
|
||||||
|
valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0]
|
||||||
|
valid_count = len(valid_words)
|
||||||
|
|
||||||
|
if valid_count == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Score for a single valid word
|
||||||
|
if valid_count == 1:
|
||||||
|
return self.score_single_word(valid_words[0])
|
||||||
|
|
||||||
|
# Logic for scoring multiple valid words
|
||||||
|
return self.score_multiple_words(valid_words)
|
||||||
|
|
||||||
|
def score_single_word(self, word):
|
||||||
|
"""Evaluate the score of a single word."""
|
||||||
|
length = len(word)
|
||||||
|
if length < 7:
|
||||||
|
return 2
|
||||||
|
elif 7 <= length < 8:
|
||||||
|
return 5
|
||||||
|
return 6
|
||||||
|
|
||||||
|
def score_multiple_words(self, valid_words):
|
||||||
|
"""Calculate the score for multiple valid words."""
|
||||||
|
total_score = 0
|
||||||
|
for word in valid_words:
|
||||||
|
length = len(word)
|
||||||
|
if length < 5:
|
||||||
|
total_score += 1
|
||||||
|
elif 5 <= length < 8:
|
||||||
|
total_score += 3
|
||||||
|
else:
|
||||||
|
total_score += 5
|
||||||
|
|
||||||
|
average_score = total_score / len(valid_words)
|
||||||
|
return min(int(average_score * 1.6), 8)
|
||||||
|
|
||||||
|
|
||||||
|
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||||
|
def __init__(self, article_text):
|
||||||
|
self.article_text = article_text
|
||||||
|
# Extract valid words and strip punctuation.
|
||||||
|
self.words = [
|
||||||
|
word.strip(string.punctuation)
|
||||||
|
for word in article_text.lower().split()
|
||||||
|
if word.strip(string.punctuation)
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def level(self):
|
||||||
|
"""Evaluate the article difficulty based on the longest ten valid words."""
|
||||||
|
valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0]
|
||||||
|
longest_words = sorted(valid_words, key=len, reverse=True)[:10]
|
||||||
|
if not longest_words:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
difficulty_ratings = []
|
||||||
|
for word in longest_words:
|
||||||
|
length = len(word)
|
||||||
|
if length < 5:
|
||||||
|
difficulty_ratings.append(0.1)
|
||||||
|
elif 5 <= length < 8:
|
||||||
|
difficulty_ratings.append(0.2)
|
||||||
|
elif 8 <= length < 11:
|
||||||
|
difficulty_ratings.append(0.3)
|
||||||
|
else:
|
||||||
|
difficulty_ratings.append(0.5)
|
||||||
|
|
||||||
|
return sum(difficulty_ratings) + 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
vocab_dict = read_pickle('frequency_mrlan85.pickle')
|
||||||
|
print(vocab_dict)
|
||||||
|
user = UserVocabularyLevel(vocab_dict)
|
||||||
|
print(user.level)
|
||||||
|
article = ArticleVocabularyLevel('This is an interesting article.')
|
||||||
|
print(article.level)
|
Loading…
Reference in New Issue