EnglishPal/app/vocabulary.py

115 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import re
import math
class VocabularyLevelEstimator:
_word_levels = {
# Simple words (levels 1-4)
"source": 3, "open": 3, "like": 2, "work": 2, "do": 1, "how": 2,
"make": 2, "money": 2, "software": 4, "free": 3, "project": 4, "run": 3,
"successful": 4, "producing": 4, "interesting": 4, "article": 4,
"simple": 3, "apple": 2, "happy": 2,
# Intermediate words (levels 4-6)
"parties": 5, "blank": 4, "stare": 5, "fringe": 5, "summarize": 6,
"economics": 6, "organizations": 6, "maintained": 6, "tool": 4,
"considering": 5, "origin": 5, "species": 5, "naturalist": 6,
"conclusion": 6, "modified": 6, "external": 5, "conditions": 5,
"structure": 6, "adapted": 6, "nourishment": 6, "pollen": 6,
"parasite": 6, "volition": 6, "process": 5, "competition": 6,
"exterminated": 6, "extinct": 6, "distribution": 6,
# Advanced words (levels 6-8)
"affinities": 7, "embryological": 8, "geographical": 7, "geological": 7,
"succession": 7, "independently": 7, "descended": 7, "unsatisfactory": 7,
"innumerable": 8, "perfection": 7, "coadaptation": 8, "preposterous": 8,
"attribute": 7, "woodpecker": 8, "misseltoe": 8, "contrivance": 7,
"variability": 7, "contingencies": 8, "intercrossing": 8, "terrestrial": 7,
"coleopterous": 8, "inorganic": 8, "improved": 7,
# User test words
"pasture": 6, "putrid": 7, "dearth": 7, "sessile": 8, "prodigal": 7,
"presumptuous": 8, "prehension": 9, "pied": 6, "pedunculated": 9, "parturition": 8,
"ovigerous": 9, "ova": 5, "orifice": 6, "obliterate": 7, "niggard": 7, "neuter": 6,
"locomotion": 6, "lineal": 5, "glottis": 8, "frivolous": 6, "frena": 8, "flotation": 5,
"ductus": 7, "dorsal": 6, "crustacean": 7, "cornea": 6, "contrivance": 6, "collateral": 7,
"cirriped": 8, "canon": 5, "branchiae": 8, "auditory": 5, "articulata": 8, "alimentary": 7,
"adduce": 6, "aberration": 7,
# 新增测试文章所需的单词
"these": 2, "several": 3, "facts": 3, "accord": 4, "well": 2,
"my": 1, "theory": 5, "believe": 3, "in": 1, "no": 1, "fixed": 3,
"law": 3, "development": 5, "causing": 4, "all": 1, "inhabitants": 6,
"country": 3, "change": 3, "abruptly": 6, "simultaneously": 7, "equal": 3,
"degree": 4, "with": 2, "the": 1, "to": 1, "of": 1, "i": 1
}
def get_word_level(self, word):
return self._word_levels.get(word.lower(), 0)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
self.content = content
words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
# 过滤出在词典中的有效单词
self.valid_words = [word for word in words if self.get_word_level(word) > 0]
n = len(self.valid_words)
if n == 0:
self._level = 0
elif n == 1:
# 单个有效单词:直接使用其难度级别
self._level = self.get_word_level(self.valid_words[0])
else:
# 多个有效单词:使用加权计算
levels = [self.get_word_level(word) for word in self.valid_words]
max_level = max(levels)
avg_level = sum(levels) / n
unique_ratio = len(set(self.valid_words)) / n # 基于有效单词计算唯一性比例
# 组合计算最高难度权重60%平均难度30%唯一性比例10%
self._level = min(8, max_level * 0.6 + avg_level * 0.3 + unique_ratio * 0.1 * 8)
# 确保最低等级为1当内容有单词时
self._level = max(1, self._level)
@property
def level(self):
return round(self._level)
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d):
word_timestamps = []
for word, timestamps in d.items():
if timestamps:
# 确保时间戳是字符串格式
max_timestamp = max(timestamps) if isinstance(timestamps[0], str) else max(timestamps).strftime(
'%Y%m%d%H%M')
word_timestamps.append((word, max_timestamp))
# 按时间戳降序排序,获取最近的单词
word_timestamps.sort(key=lambda x: x[1], reverse=True)
self.recent_words = [word for word, _ in word_timestamps[:3]]
levels = [self.get_word_level(word) for word in self.recent_words]
self.valid_levels = [lvl for lvl in levels if lvl > 0]
n = len(self.valid_levels)
if n == 0:
self._level = 0
else:
max_level = max(self.valid_levels)
# 对数调整:单词数量越多,调整值越大
adjustment = min(1.0, 0.5 * math.log2(n + 1))
self._level = min(8, max(0, max_level + adjustment))
# 确保最低等级为1当用户有单词时
self._level = max(1, self._level)
@property
def level(self):
return round(self._level)