diff --git a/app/vocabulary(2).py b/app/vocabulary(2).py new file mode 100644 index 0000000..151e5bc --- /dev/null +++ b/app/vocabulary(2).py @@ -0,0 +1,117 @@ +import pickle +import string +import os + + +def read_pickle(file_name): + """Read data from a pickle file if it exists.""" + if not os.path.isfile(file_name): + print(f"File {file_name} does not exist.") + return {} + with open(file_name, 'rb') as f: + return pickle.load(f) + + +class VocabularyLevelEstimator: + # Load the word database for testing + _word_db = read_pickle('words_and_tests.p') + + def evaluate_word_difficulty(self, word): + """Evaluate the difficulty level of a word.""" + # Handle empty strings, punctuation only, or purely numeric strings. + if not word or all(ch in string.punctuation for ch in word) or word.isdigit(): + return 0 + return 5 if word in self._word_db else 0 + + @property + def difficulty_level(self): + return self.calculate_longest_words_level() + + +class UserVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, vocab_dict): + self.vocab_dict = vocab_dict + # Use the last three words from the vocabulary dictionary. + self.recent_words = list(vocab_dict.keys())[-3:] + + @property + def level(self): + """Calculate the user's vocabulary level considering word validity.""" + valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0] + valid_count = len(valid_words) + + if valid_count == 0: + return 0 + + # Score for a single valid word + if valid_count == 1: + return self.score_single_word(valid_words[0]) + + # Logic for scoring multiple valid words + return self.score_multiple_words(valid_words) + + def score_single_word(self, word): + """Evaluate the score of a single word.""" + length = len(word) + if length < 7: + return 2 + elif 7 <= length < 8: + return 5 + return 6 + + def score_multiple_words(self, valid_words): + """Calculate the score for multiple valid words.""" + total_score = 0 + for word in valid_words: + length = len(word) + if length < 5: + total_score += 1 + elif 5 <= length < 8: + total_score += 3 + else: + total_score += 5 + + average_score = total_score / len(valid_words) + return min(int(average_score * 1.6), 8) + + +class ArticleVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, article_text): + self.article_text = article_text + # Extract valid words and strip punctuation. + self.words = [ + word.strip(string.punctuation) + for word in article_text.lower().split() + if word.strip(string.punctuation) + ] + + @property + def level(self): + """Evaluate the article difficulty based on the longest ten valid words.""" + valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0] + longest_words = sorted(valid_words, key=len, reverse=True)[:10] + if not longest_words: + return 0 + + difficulty_ratings = [] + for word in longest_words: + length = len(word) + if length < 5: + difficulty_ratings.append(0.1) + elif 5 <= length < 8: + difficulty_ratings.append(0.2) + elif 8 <= length < 11: + difficulty_ratings.append(0.3) + else: + difficulty_ratings.append(0.5) + + return sum(difficulty_ratings) + 2 + + +if __name__ == '__main__': + vocab_dict = read_pickle('frequency_mrlan85.pickle') + print(vocab_dict) + user = UserVocabularyLevel(vocab_dict) + print(user.level) + article = ArticleVocabularyLevel('This is an interesting article.') + print(article.level) \ No newline at end of file