import pickle import string import os def read_pickle(file_name): """Read data from a pickle file if it exists.""" if not os.path.isfile(file_name): print(f"File {file_name} does not exist.") return {} with open(file_name, 'rb') as f: return pickle.load(f) class VocabularyLevelEstimator: # Load the word database for testing _word_db = read_pickle('words_and_tests.p') def evaluate_word_difficulty(self, word): """Evaluate the difficulty level of a word.""" # Handle empty strings, punctuation only, or purely numeric strings. if not word or all(ch in string.punctuation for ch in word) or word.isdigit(): return 0 return 5 if word in self._word_db else 0 @property def difficulty_level(self): return self.calculate_longest_words_level() class UserVocabularyLevel(VocabularyLevelEstimator): def __init__(self, vocab_dict): self.vocab_dict = vocab_dict # Use the last three words from the vocabulary dictionary. self.recent_words = list(vocab_dict.keys())[-3:] @property def level(self): """Calculate the user's vocabulary level considering word validity.""" valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0] valid_count = len(valid_words) if valid_count == 0: return 0 # Score for a single valid word if valid_count == 1: return self.score_single_word(valid_words[0]) # Logic for scoring multiple valid words return self.score_multiple_words(valid_words) def score_single_word(self, word): """Evaluate the score of a single word.""" length = len(word) if length < 7: return 2 elif 7 <= length < 8: return 5 return 6 def score_multiple_words(self, valid_words): """Calculate the score for multiple valid words.""" total_score = 0 for word in valid_words: length = len(word) if length < 5: total_score += 1 elif 5 <= length < 8: total_score += 3 else: total_score += 5 average_score = total_score / len(valid_words) return min(int(average_score * 1.6), 8) class ArticleVocabularyLevel(VocabularyLevelEstimator): def __init__(self, article_text): self.article_text = article_text # Extract valid words and strip punctuation. self.words = [ word.strip(string.punctuation) for word in article_text.lower().split() if word.strip(string.punctuation) ] @property def level(self): """Evaluate the article difficulty based on the longest ten valid words.""" valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0] longest_words = sorted(valid_words, key=len, reverse=True)[:10] if not longest_words: return 0 difficulty_ratings = [] for word in longest_words: length = len(word) if length < 5: difficulty_ratings.append(0.1) elif 5 <= length < 8: difficulty_ratings.append(0.2) elif 8 <= length < 11: difficulty_ratings.append(0.3) else: difficulty_ratings.append(0.5) return sum(difficulty_ratings) + 2 if __name__ == '__main__': vocab_dict = read_pickle('frequency_mrlan85.pickle') print(vocab_dict) user = UserVocabularyLevel(vocab_dict) print(user.level) article = ArticleVocabularyLevel('This is an interesting article.') print(article.level)