请老师查阅我们组更新的vocabulary.py
parent
d9512c929b
commit
d8e5fa4f54
|
@ -0,0 +1,117 @@
|
|||
import pickle
|
||||
import string
|
||||
import os
|
||||
|
||||
|
||||
def read_pickle(file_name):
|
||||
"""Read data from a pickle file if it exists."""
|
||||
if not os.path.isfile(file_name):
|
||||
print(f"File {file_name} does not exist.")
|
||||
return {}
|
||||
with open(file_name, 'rb') as f:
|
||||
return pickle.load(f)
|
||||
|
||||
|
||||
class VocabularyLevelEstimator:
|
||||
# Load the word database for testing
|
||||
_word_db = read_pickle('words_and_tests.p')
|
||||
|
||||
def evaluate_word_difficulty(self, word):
|
||||
"""Evaluate the difficulty level of a word."""
|
||||
# Handle empty strings, punctuation only, or purely numeric strings.
|
||||
if not word or all(ch in string.punctuation for ch in word) or word.isdigit():
|
||||
return 0
|
||||
return 5 if word in self._word_db else 0
|
||||
|
||||
@property
|
||||
def difficulty_level(self):
|
||||
return self.calculate_longest_words_level()
|
||||
|
||||
|
||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, vocab_dict):
|
||||
self.vocab_dict = vocab_dict
|
||||
# Use the last three words from the vocabulary dictionary.
|
||||
self.recent_words = list(vocab_dict.keys())[-3:]
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
"""Calculate the user's vocabulary level considering word validity."""
|
||||
valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0]
|
||||
valid_count = len(valid_words)
|
||||
|
||||
if valid_count == 0:
|
||||
return 0
|
||||
|
||||
# Score for a single valid word
|
||||
if valid_count == 1:
|
||||
return self.score_single_word(valid_words[0])
|
||||
|
||||
# Logic for scoring multiple valid words
|
||||
return self.score_multiple_words(valid_words)
|
||||
|
||||
def score_single_word(self, word):
|
||||
"""Evaluate the score of a single word."""
|
||||
length = len(word)
|
||||
if length < 7:
|
||||
return 2
|
||||
elif 7 <= length < 8:
|
||||
return 5
|
||||
return 6
|
||||
|
||||
def score_multiple_words(self, valid_words):
|
||||
"""Calculate the score for multiple valid words."""
|
||||
total_score = 0
|
||||
for word in valid_words:
|
||||
length = len(word)
|
||||
if length < 5:
|
||||
total_score += 1
|
||||
elif 5 <= length < 8:
|
||||
total_score += 3
|
||||
else:
|
||||
total_score += 5
|
||||
|
||||
average_score = total_score / len(valid_words)
|
||||
return min(int(average_score * 1.6), 8)
|
||||
|
||||
|
||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, article_text):
|
||||
self.article_text = article_text
|
||||
# Extract valid words and strip punctuation.
|
||||
self.words = [
|
||||
word.strip(string.punctuation)
|
||||
for word in article_text.lower().split()
|
||||
if word.strip(string.punctuation)
|
||||
]
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
"""Evaluate the article difficulty based on the longest ten valid words."""
|
||||
valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0]
|
||||
longest_words = sorted(valid_words, key=len, reverse=True)[:10]
|
||||
if not longest_words:
|
||||
return 0
|
||||
|
||||
difficulty_ratings = []
|
||||
for word in longest_words:
|
||||
length = len(word)
|
||||
if length < 5:
|
||||
difficulty_ratings.append(0.1)
|
||||
elif 5 <= length < 8:
|
||||
difficulty_ratings.append(0.2)
|
||||
elif 8 <= length < 11:
|
||||
difficulty_ratings.append(0.3)
|
||||
else:
|
||||
difficulty_ratings.append(0.5)
|
||||
|
||||
return sum(difficulty_ratings) + 2
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
vocab_dict = read_pickle('frequency_mrlan85.pickle')
|
||||
print(vocab_dict)
|
||||
user = UserVocabularyLevel(vocab_dict)
|
||||
print(user.level)
|
||||
article = ArticleVocabularyLevel('This is an interesting article.')
|
||||
print(article.level)
|
Loading…
Reference in New Issue