Bug585-Wangxitao #197

Open
wangxitao wants to merge 2 commits from wangxitao/EnglishPal:Bug585-Wangxitao into Alpha-snapshot20240618
2 changed files with 61 additions and 0 deletions

BIN
app/static/wordfreqapp.db Normal file

Binary file not shown.

61
app/vocabulary.py Normal file
View File

@ -0,0 +1,61 @@
import re
import pickle
def load_record(pickle_fname):
with open(pickle_fname, 'rb') as f:
d = pickle.load(f)
return d
def convert_test_type_to_difficulty_level(d):
result = {}
for word, test_types in d.items():
if 'CET4' in test_types:
result[word] = 4
elif 'OXFORD3000' in test_types:
result[word] = 5
elif 'CET6' in test_types or 'GRADUATE' in test_types:
result[word] = 6
elif 'IELTS' in test_types:
result[word] = 7
elif 'OXFORD5000' in test_types:
result[word] = 7
elif 'BBC' in test_types:
result[word] = 8
else:
result[word] = 0
return result
class VocabularyLevelEstimator:
_test_raw = load_record('words_and_tests.p')
_difficulty_dict = convert_test_type_to_difficulty_level(_test_raw)
@classmethod
def get_word_level(cls, word):
return cls._difficulty_dict.get(word, 0)
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
self.d = d
word_time = [(word, max(times)) for word, times in d.items() if times]
sorted_words = sorted(word_time, key=lambda x: x[1], reverse=True)
self.recent_words = [word for word, _ in sorted_words[:3]]
@property
def level(self):
levels = [self.get_word_level(word) for word in self.recent_words]
valid_levels = [lvl for lvl in levels if lvl > 0]
return sum(valid_levels) / len(valid_levels) if valid_levels else 0
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
self.content = content
words = re.findall(r'\b[\w-]+\b', content.lower())
word_levels = [self.get_word_level(word) for word in words]
valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True)
self.top_levels = valid_levels[:5] # 取前5个最高难度单词
@property
def level(self):
if not self.top_levels:
return 0
return sum(self.top_levels) / len(self.top_levels)