EnglishPal/app/vocabulary.py

113 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import re
import math
class VocabularyLevelEstimator:
_word_levels = {
# Simple words (levels 1-4)
"source": 3, "open": 3, "like": 2, "work": 2, "do": 1, "how": 2,
"make": 2, "money": 2, "software": 4, "free": 3, "project": 4, "run": 3,
"successful": 4, "producing": 4, "interesting": 4, "article": 4,
"simple": 3, "apple": 2, "happy": 2,
# Intermediate words (levels 4-6)
"parties": 5, "blank": 4, "stare": 5, "fringe": 5, "summarize": 6,
"economics": 6, "organizations": 6, "maintained": 6, "tool": 4,
"considering": 5, "origin": 5, "species": 5, "naturalist": 6,
"conclusion": 6, "modified": 6, "external": 5, "conditions": 5,
"structure": 6, "adapted": 6, "nourishment": 6, "pollen": 6,
"parasite": 6, "volition": 6, "process": 5, "competition": 6,
"exterminated": 6, "extinct": 6, "distribution": 6,
# Advanced words (levels 6-8)
"affinities": 7, "embryological": 8, "geographical": 7, "geological": 7,
"succession": 7, "independently": 7, "descended": 7, "unsatisfactory": 7,
"innumerable": 8, "perfection": 7, "coadaptation": 8, "preposterous": 8,
"attribute": 7, "woodpecker": 8, "misseltoe": 8, "contrivance": 7,
"variability": 7, "contingencies": 8, "intercrossing": 8, "terrestrial": 7,
"coleopterous": 8, "inorganic": 8, "improved": 7,
# User test words
"pasture": 6, "putrid": 7, "dearth": 7, "sessile": 8, "prodigal": 7,
"presumptuous": 8, "prehension": 9, "pied": 6, "pedunculated": 9, "parturition": 8,
"ovigerous": 9, "ova": 5, "orifice": 6, "obliterate": 7, "niggard": 7, "neuter": 6,
"locomotion": 6, "lineal": 5, "glottis": 8, "frivolous": 6, "frena": 8, "flotation": 5,
"ductus": 7, "dorsal": 6, "crustacean": 7, "cornea": 6, "contrivance": 6, "collateral": 7,
"cirriped": 8, "canon": 5, "branchiae": 8, "auditory": 5, "articulata": 8, "alimentary": 7,
"adduce": 6, "aberration": 7,
# 新增测试文章所需的单词
"these": 2, "several": 3, "facts": 3, "accord": 4, "well": 2,
"my": 1, "theory": 5, "believe": 3, "in": 1, "no": 1, "fixed": 3,
"law": 3, "development": 5, "causing": 4, "all": 1, "inhabitants": 6,
"country": 3, "change": 3, "abruptly": 6, "simultaneously": 7, "equal": 3,
"degree": 4, "with": 2, "the": 1, "to": 1, "of": 1, "i": 1
}
def get_word_level(self, word):
return self._word_levels.get(word.lower(), 0)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
self.content = content
words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
# 过滤出在词典中的有效单词
self.valid_words = [word for word in words if self.get_word_level(word) > 0]
n = len(self.valid_words)
if n == 0:
self._level = 0
elif n == 1:
# 单个有效单词:直接使用其难度级别
self._level = self.get_word_level(self.valid_words[0])
else:
# 多个有效单词:使用加权计算
levels = [self.get_word_level(word) for word in self.valid_words]
max_level = max(levels)
avg_level = sum(levels) / n
unique_ratio = len(set(self.valid_words)) / n # 基于有效单词计算唯一性比例
# 组合计算最高难度权重60%平均难度30%唯一性比例10%
self._level = min(8, max_level * 0.6 + avg_level * 0.3 + unique_ratio * 0.1 * 8)
# 确保最低等级为1当内容有单词时
self._level = max(1, self._level)
@property
def level(self):
return round(self._level)
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
self.recent_words = self._get_recent_words(d)
self._level = self._calculate_level()
def _get_recent_words(self, d):
word_timestamps = []
for word, timestamps in d.items():
if timestamps:
if isinstance(timestamps[0], str):
max_timestamp = max(timestamps)
else:
max_timestamp = max(timestamps).strftime('%Y%m%d%H%M')
word_timestamps.append((word, max_timestamp))
word_timestamps.sort(key=lambda x: x[1], reverse=True)
return [word for word, _ in word_timestamps[:3]]
def _calculate_level(self):
levels = [self.get_word_level(word) for word in self.recent_words]
valid_levels = [lvl for lvl in levels if lvl > 0]
n = len(valid_levels)
if n == 0:
return 0
else:
max_level = max(valid_levels)
adjustment = min(1.0, 0.5 * math.log2(n + 1))
return min(8, max(0, max_level + adjustment))
@property
def level(self):
return round(self._level)