EnglishPal/test_vocabulary_output_2025...

84 lines
2.5 KiB
Plaintext

import pickle
import re
from collections import defaultdict
def load_record(pickle_fname):
with open(pickle_fname, 'rb') as f:
d = pickle.load(f)
return d
class VocabularyLevelEstimator:
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
def __init__(self):
self.word_lst = []
def calculate_level(self, word):
"""Calculate difficulty level for a single word"""
if word in self._test:
if 'IELTS' in self._test[word]:
return 6
elif 'BBC' in self._test[word]:
return 5
elif 'CET6' in self._test[word]:
return 4
elif 'CET4' in self._test[word]:
return 3
elif 'OXFORD3000' in self._test[word]:
return 2
else:
return 1
else:
return 0
@property
def level(self):
if not self.word_lst:
return 0.0
# Calculate average difficulty of the words
total = sum(self.calculate_level(word) for word in self.word_lst)
return total / len(self.word_lst)
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
super().__init__()
self.d = d
self.word_lst = list(d.keys())
@property
def level(self):
if not self.word_lst:
return 0.0
# Only consider the most recent 3 words for user
recent_words = self.word_lst[:3]
# Calculate average difficulty of the recent words
total = sum(self.calculate_level(word) for word in recent_words)
return total / len(recent_words)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
super().__init__()
self.content = content
# Preprocess content: remove punctuation and split into words
words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
# Remove duplicates and sort by difficulty (descending)
unique_words = list(dict.fromkeys(words))
unique_words.sort(key=lambda w: self.calculate_level(w), reverse=True)
# Select top 10 difficult words
self.word_lst = unique_words[:10]
if __name__ == '__main__':
# 示例用法
# d = load_record('frequency_mrlan85.pickle')
# print(d)
# user = UserVocabularyLevel(d)
# print(user.level) # level is a property
# article = ArticleVocabularyLevel('This is an interesting article')
# print(article.level)
pass