vocabulary
parent
d9512c929b
commit
6462ef87cc
Binary file not shown.
|
@ -0,0 +1,61 @@
|
|||
import re
|
||||
import pickle
|
||||
|
||||
def load_record(pickle_fname):
|
||||
with open(pickle_fname, 'rb') as f:
|
||||
d = pickle.load(f)
|
||||
return d
|
||||
|
||||
def convert_test_type_to_difficulty_level(d):
|
||||
result = {}
|
||||
for word, test_types in d.items():
|
||||
if 'CET4' in test_types:
|
||||
result[word] = 4
|
||||
elif 'OXFORD3000' in test_types:
|
||||
result[word] = 5
|
||||
elif 'CET6' in test_types or 'GRADUATE' in test_types:
|
||||
result[word] = 6
|
||||
elif 'IELTS' in test_types:
|
||||
result[word] = 7
|
||||
elif 'OXFORD5000' in test_types:
|
||||
result[word] = 7
|
||||
elif 'BBC' in test_types:
|
||||
result[word] = 8
|
||||
else:
|
||||
result[word] = 0
|
||||
return result
|
||||
|
||||
class VocabularyLevelEstimator:
|
||||
_test_raw = load_record('words_and_tests.p')
|
||||
_difficulty_dict = convert_test_type_to_difficulty_level(_test_raw)
|
||||
|
||||
@classmethod
|
||||
def get_word_level(cls, word):
|
||||
return cls._difficulty_dict.get(word, 0)
|
||||
|
||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, d):
|
||||
self.d = d
|
||||
word_time = [(word, max(times)) for word, times in d.items() if times]
|
||||
sorted_words = sorted(word_time, key=lambda x: x[1], reverse=True)
|
||||
self.recent_words = [word for word, _ in sorted_words[:3]]
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
levels = [self.get_word_level(word) for word in self.recent_words]
|
||||
valid_levels = [lvl for lvl in levels if lvl > 0]
|
||||
return sum(valid_levels) / len(valid_levels) if valid_levels else 0
|
||||
|
||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
words = re.findall(r'\b[\w-]+\b', content.lower())
|
||||
word_levels = [self.get_word_level(word) for word in words]
|
||||
valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True)
|
||||
self.top_levels = valid_levels[:5] # 取前5个最高难度单词
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
if not self.top_levels:
|
||||
return 0
|
||||
return sum(self.top_levels) / len(self.top_levels)
|
Loading…
Reference in New Issue