import pickle
import re
from collections import defaultdict

def load_record(pickle_fname):
    with open(pickle_fname, 'rb') as f:
        d = pickle.load(f)
    return d

class VocabularyLevelEstimator:
    _test = load_record('words_and_tests.p')  # map a word to the sources where it appears

    def __init__(self):
        self.word_lst = []

    def calculate_level(self, word):
        """Calculate difficulty level for a single word"""
        if word in self._test:
            if 'IELTS' in self._test[word]:
                return 6
            elif 'BBC' in self._test[word]:
                return 5
            elif 'CET6' in self._test[word]:
                return 4
            elif 'CET4' in self._test[word]:
                return 3
            elif 'OXFORD3000' in self._test[word]:
                return 2
            else:
                return 1
        else:
            return 0

    @property
    def level(self):
        if not self.word_lst:
            return 0.0

        # Calculate average difficulty of the words
        total = sum(self.calculate_level(word) for word in self.word_lst)
        return total / len(self.word_lst)

class UserVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, d):
        super().__init__()
        self.d = d
        self.word_lst = list(d.keys())

    @property
    def level(self):
        if not self.word_lst:
            return 0.0

        # Only consider the most recent 3 words for user
        recent_words = self.word_lst[:3]

        # Calculate average difficulty of the recent words
        total = sum(self.calculate_level(word) for word in recent_words)
        return total / len(recent_words)

class ArticleVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, content):
        super().__init__()
        self.content = content

        # Preprocess content: remove punctuation and split into words
        words = re.findall(r'\b[a-zA-Z]+\b', content.lower())

        # Remove duplicates and sort by difficulty (descending)
        unique_words = list(dict.fromkeys(words))
        unique_words.sort(key=lambda w: self.calculate_level(w), reverse=True)

        # Select top 10 difficult words
        self.word_lst = unique_words[:10]

if __name__ == '__main__':
    # 示例用法
    # d = load_record('frequency_mrlan85.pickle')
    # print(d)
    # user = UserVocabularyLevel(d)
    # print(user.level)  # level is a property
    # article = ArticleVocabularyLevel('This is an interesting article')
    # print(article.level)
    pass