diff --git a/app/static/wordfreqapp.db b/app/static/wordfreqapp.db new file mode 100644 index 0000000..943d643 Binary files /dev/null and b/app/static/wordfreqapp.db differ diff --git a/app/vocabulary.py b/app/vocabulary.py new file mode 100644 index 0000000..73864d7 --- /dev/null +++ b/app/vocabulary.py @@ -0,0 +1,61 @@ +import re +import pickle + +def load_record(pickle_fname): + with open(pickle_fname, 'rb') as f: + d = pickle.load(f) + return d + +def convert_test_type_to_difficulty_level(d): + result = {} + for word, test_types in d.items(): + if 'CET4' in test_types: + result[word] = 4 + elif 'OXFORD3000' in test_types: + result[word] = 5 + elif 'CET6' in test_types or 'GRADUATE' in test_types: + result[word] = 6 + elif 'IELTS' in test_types: + result[word] = 7 + elif 'OXFORD5000' in test_types: + result[word] = 7 + elif 'BBC' in test_types: + result[word] = 8 + else: + result[word] = 0 + return result + +class VocabularyLevelEstimator: + _test_raw = load_record('words_and_tests.p') + _difficulty_dict = convert_test_type_to_difficulty_level(_test_raw) + + @classmethod + def get_word_level(cls, word): + return cls._difficulty_dict.get(word, 0) + +class UserVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, d): + self.d = d + word_time = [(word, max(times)) for word, times in d.items() if times] + sorted_words = sorted(word_time, key=lambda x: x[1], reverse=True) + self.recent_words = [word for word, _ in sorted_words[:3]] + + @property + def level(self): + levels = [self.get_word_level(word) for word in self.recent_words] + valid_levels = [lvl for lvl in levels if lvl > 0] + return sum(valid_levels) / len(valid_levels) if valid_levels else 0 + +class ArticleVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, content): + self.content = content + words = re.findall(r'\b[\w-]+\b', content.lower()) + word_levels = [self.get_word_level(word) for word in words] + valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True) + self.top_levels = valid_levels[:5] # 取前5个最高难度单词 + + @property + def level(self): + if not self.top_levels: + return 0 + return sum(self.top_levels) / len(self.top_levels)