from difficulty import VocabularyLevelEstimator import pickle import os from collections import Counter import string # Helper functions def is_punctuation_or_digit(s): return all((c in string.punctuation or c.isdigit() or c.isspace()) for c in s) def is_valid_word(word): return word.isalpha() class UserVocabularyLevel(VocabularyLevelEstimator): """Estimates a user's vocabulary level based on their word history""" def __init__(self, word_history, word_data_path=None): """ Initialize with user's word history Args: word_history (dict): Dictionary of words the user has learned word_data_path (str): Optional path to Oxford word level data """ if word_data_path is None: word_data_path = 'db/oxford_words.txt' super().__init__(word_data_path) self.word_history = word_history self._level = None # Cache for computed level @property def level(self): """Calculate user's vocabulary level based on their word history""" if self._level is None: if not self.word_history: self._level = 0 return self._level # Get most recent 3 words by timestamp # word_history: {word: [timestamp1, timestamp2, ...]} word_times = [] for word, times in self.word_history.items(): for t in times: word_times.append((t, word)) if not word_times: self._level = 0 return self._level word_times.sort(reverse=True) # Most recent first recent_words = [] seen = set() for t, word in word_times: if word not in seen and is_valid_word(word): recent_words.append(word) seen.add(word) if len(recent_words) == 3: break if not recent_words: self._level = 0 return self._level levels = [self.get_word_level(word) for word in recent_words] # If all levels are 0 (invalid words), return 0 if all(l == 0 for l in levels): self._level = 0 else: avg = sum(levels) / len(levels) # If all recent words are easy (avg < 4), set to 4 self._level = avg if avg >= 4 else 4 return self._level def get_level_distribution(self): """Returns distribution of word levels in user's vocabulary""" if not self.word_history: return {} levels = [self.get_word_level(word) for word in self.word_history.keys() if is_valid_word(word)] return Counter(levels) class ArticleVocabularyLevel(VocabularyLevelEstimator): """Estimates vocabulary level of an article""" def __init__(self, content, word_data_path=None): """ Initialize with article content Args: content (str): The article text word_data_path (str): Optional path to Oxford word level data """ if word_data_path is None: word_data_path = 'db/oxford_words.txt' super().__init__(word_data_path) self.content = content self._level = None @property def level(self): """Calculate article's vocabulary level""" if self._level is None: if not self.content or is_punctuation_or_digit(self.content): self._level = 0 return self._level words = [word.strip(string.punctuation).lower() for word in self.content.split()] words = [w for w in words if w and is_valid_word(w)] if not words: self._level = 0 return self._level word_levels = [(word, self.get_word_level(word)) for word in words] word_levels = [wl for wl in word_levels if wl[1] > 0] if not word_levels: self._level = 0 return self._level levels = [level for _, level in word_levels] if len(levels) == 1: self._level = levels[0] elif len(levels) <= 3: self._level = max(levels) + 0.1 * (len(levels) - 1) else: levels.sort(reverse=True) hardest = levels[:10] self._level = sum(hardest) / len(hardest) return self._level def get_difficult_words(self, threshold=6): """ Returns words above difficulty threshold Args: threshold (int): Minimum difficulty level (default 6) Returns: list: Words above threshold sorted by difficulty """ words = [word.strip(string.punctuation).lower() for word in self.content.split()] words = [w for w in words if w and is_valid_word(w)] difficult_words = [] for word in set(words): # Use set to remove duplicates level = self.get_word_level(word) if level >= threshold: difficult_words.append((word, level)) return sorted(difficult_words, key=lambda x: x[1], reverse=True) def load_record(pickle_file): """Load user word history from pickle file""" try: # Try current directory first current_dir = os.getcwd() file_path = os.path.join(current_dir, 'static', 'frequency', pickle_file) with open(file_path, 'rb') as f: return pickle.load(f) except FileNotFoundError: try: # Try app directory path base_path = r'C:\Users\ANNA\Desktop\app' file_path = os.path.join(base_path, 'static', 'frequency', pickle_file) with open(file_path, 'rb') as f: return pickle.load(f) except FileNotFoundError: print(f"Warning: Could not find file: {file_path}") # Create default word history with advanced words default_history = { "sophisticated": ["20240101", "20240102", "20240103"], "analytical": ["20240101", "20240102", "20240103"], "comprehensive": ["20240101", "20240102"], "theoretical": ["20240101", "20240103"], "implementation": ["20240102", "20240103"], "algorithm": ["20240101", "20240102"], "methodology": ["20240101", "20240103"], "paradigm": ["20240102", "20240103"] } # Create directory if it doesn't exist os.makedirs(os.path.dirname(file_path), exist_ok=True) # Save default history with open(file_path, 'wb') as f: pickle.dump(default_history, f) return default_history if __name__ == "__main__": # Example usage d = load_record('frequency_mr1an85.pickle') # Just use the filename print("User word history:", d) # Test user vocabulary level user = UserVocabularyLevel(d) print("User vocabulary level:", user.level) print("Level distribution:", user.get_level_distribution()) # Test article vocabulary level article = ArticleVocabularyLevel( "This is an interesting article with sophisticated vocabulary." ) print("Article vocabulary level:", article.level) print("Difficult words:", article.get_difficult_words())