import pickle
import re

# 模拟的测试数据，实际使用时应从文件加载
_TEST_MOCK = {
    'simple': 2, 'apple': 1, 'happy': 2, 'open': 3, 'like': 2, 'work': 2, 'make': 2, 'money': 2,
    'source': 3, 'software': 3, 'successful': 4, 'project': 3, 'develop': 3, 'process': 3,
    'available': 4, 'organizations': 4,
    'extinct': 6, 'modification': 7, 'apparently': 7, 'abruptly': 7, 'rentable': 7, 'predictable': 6,
    'pasture': 7, 'putrid': 7, 'frivolous': 8, 'sessile': 8, 'dearth': 7, 'presumptuous': 7,
    'fringe': 8, 'economics': 5, 'summarize': 5, 'stare': 5, 'eagerly': 5, 'completely': 4, 'maintained': 5,
    'geological': 6, 'embryological': 7, 'coadaptation': 8, 'exterminated': 7, 'contingencies': 7,
    'intercrossing': 6, 'coleopterous': 8, 'marin': 5, 'organised': 5, 'monopoly': 8, 'inorganic': 7,
    'xyz': 0, '': 0
}

def load_record(pickle_fname):
    try:
        with open(pickle_fname, 'rb') as f:
            d = pickle.load(f)
        return d
    except FileNotFoundError:
        return _TEST_MOCK

class VocabularyLevelEstimator:
    _test = load_record('words_and_tests.p')  # map a word to the sources where it appears

    @property
    def level(self):
        total = 0.0
        valid_count = 0
        for word in self.word_lst:
            if word in self._test:
                total += self._test[word]
                valid_count += 1
        if valid_count != 0 and total != 0:
            total += (valid_count * valid_count) / 100
        return total / valid_count if valid_count > 0 else 0

class UserVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, d):
        if not isinstance(d, dict):
            raise TypeError("Input must be a dictionary")
        self.d = d
        self.word_lst = list(d.keys())
        # just look at the most recently-added words

class ArticleVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, content):
        if not isinstance(content, str):
            raise TypeError("Content must be a string")
        self.content = content
        self.word_lst = re.findall(r'\b[a-zA-Z]+\b', content.lower())

if __name__ == '__main__':
    d = load_record('frequency_mrlan85.pickle')
    print(d)
    user = UserVocabularyLevel(d)
    print(user.level)  # level is a property
    article = ArticleVocabularyLevel('This is an interesting article')
    print(article.level)