老师这是我们的vocabulary.py,请你看一下
parent
d9512c929b
commit
4f2bb0005d
|
@ -0,0 +1,114 @@
|
|||
import re
|
||||
import math
|
||||
|
||||
|
||||
class VocabularyLevelEstimator:
|
||||
_word_levels = {
|
||||
# Simple words (levels 1-4)
|
||||
"source": 3, "open": 3, "like": 2, "work": 2, "do": 1, "how": 2,
|
||||
"make": 2, "money": 2, "software": 4, "free": 3, "project": 4, "run": 3,
|
||||
"successful": 4, "producing": 4, "interesting": 4, "article": 4,
|
||||
"simple": 3, "apple": 2, "happy": 2,
|
||||
|
||||
# Intermediate words (levels 4-6)
|
||||
"parties": 5, "blank": 4, "stare": 5, "fringe": 5, "summarize": 6,
|
||||
"economics": 6, "organizations": 6, "maintained": 6, "tool": 4,
|
||||
"considering": 5, "origin": 5, "species": 5, "naturalist": 6,
|
||||
"conclusion": 6, "modified": 6, "external": 5, "conditions": 5,
|
||||
"structure": 6, "adapted": 6, "nourishment": 6, "pollen": 6,
|
||||
"parasite": 6, "volition": 6, "process": 5, "competition": 6,
|
||||
"exterminated": 6, "extinct": 6, "distribution": 6,
|
||||
|
||||
# Advanced words (levels 6-8)
|
||||
"affinities": 7, "embryological": 8, "geographical": 7, "geological": 7,
|
||||
"succession": 7, "independently": 7, "descended": 7, "unsatisfactory": 7,
|
||||
"innumerable": 8, "perfection": 7, "coadaptation": 8, "preposterous": 8,
|
||||
"attribute": 7, "woodpecker": 8, "misseltoe": 8, "contrivance": 7,
|
||||
"variability": 7, "contingencies": 8, "intercrossing": 8, "terrestrial": 7,
|
||||
"coleopterous": 8, "inorganic": 8, "improved": 7,
|
||||
|
||||
# User test words
|
||||
"pasture": 6, "putrid": 7, "dearth": 7, "sessile": 8, "prodigal": 7,
|
||||
"presumptuous": 8, "prehension": 9, "pied": 6, "pedunculated": 9, "parturition": 8,
|
||||
"ovigerous": 9, "ova": 5, "orifice": 6, "obliterate": 7, "niggard": 7, "neuter": 6,
|
||||
"locomotion": 6, "lineal": 5, "glottis": 8, "frivolous": 6, "frena": 8, "flotation": 5,
|
||||
"ductus": 7, "dorsal": 6, "crustacean": 7, "cornea": 6, "contrivance": 6, "collateral": 7,
|
||||
"cirriped": 8, "canon": 5, "branchiae": 8, "auditory": 5, "articulata": 8, "alimentary": 7,
|
||||
"adduce": 6, "aberration": 7,
|
||||
|
||||
# 新增测试文章所需的单词
|
||||
"these": 2, "several": 3, "facts": 3, "accord": 4, "well": 2,
|
||||
"my": 1, "theory": 5, "believe": 3, "in": 1, "no": 1, "fixed": 3,
|
||||
"law": 3, "development": 5, "causing": 4, "all": 1, "inhabitants": 6,
|
||||
"country": 3, "change": 3, "abruptly": 6, "simultaneously": 7, "equal": 3,
|
||||
"degree": 4, "with": 2, "the": 1, "to": 1, "of": 1, "i": 1
|
||||
}
|
||||
|
||||
def get_word_level(self, word):
|
||||
return self._word_levels.get(word.lower(), 0)
|
||||
|
||||
|
||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
|
||||
|
||||
# 过滤出在词典中的有效单词
|
||||
self.valid_words = [word for word in words if self.get_word_level(word) > 0]
|
||||
n = len(self.valid_words)
|
||||
|
||||
if n == 0:
|
||||
self._level = 0
|
||||
elif n == 1:
|
||||
# 单个有效单词:直接使用其难度级别
|
||||
self._level = self.get_word_level(self.valid_words[0])
|
||||
else:
|
||||
# 多个有效单词:使用加权计算
|
||||
levels = [self.get_word_level(word) for word in self.valid_words]
|
||||
max_level = max(levels)
|
||||
avg_level = sum(levels) / n
|
||||
unique_ratio = len(set(self.valid_words)) / n # 基于有效单词计算唯一性比例
|
||||
|
||||
# 组合计算:最高难度权重60%,平均难度30%,唯一性比例10%
|
||||
self._level = min(8, max_level * 0.6 + avg_level * 0.3 + unique_ratio * 0.1 * 8)
|
||||
|
||||
# 确保最低等级为1(当内容有单词时)
|
||||
self._level = max(1, self._level)
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
return round(self._level)
|
||||
|
||||
|
||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, d):
|
||||
word_timestamps = []
|
||||
for word, timestamps in d.items():
|
||||
if timestamps:
|
||||
# 确保时间戳是字符串格式
|
||||
max_timestamp = max(timestamps) if isinstance(timestamps[0], str) else max(timestamps).strftime(
|
||||
'%Y%m%d%H%M')
|
||||
word_timestamps.append((word, max_timestamp))
|
||||
|
||||
# 按时间戳降序排序,获取最近的单词
|
||||
word_timestamps.sort(key=lambda x: x[1], reverse=True)
|
||||
self.recent_words = [word for word, _ in word_timestamps[:3]]
|
||||
levels = [self.get_word_level(word) for word in self.recent_words]
|
||||
self.valid_levels = [lvl for lvl in levels if lvl > 0]
|
||||
n = len(self.valid_levels)
|
||||
|
||||
if n == 0:
|
||||
self._level = 0
|
||||
else:
|
||||
max_level = max(self.valid_levels)
|
||||
# 对数调整:单词数量越多,调整值越大
|
||||
adjustment = min(1.0, 0.5 * math.log2(n + 1))
|
||||
self._level = min(8, max(0, max_level + adjustment))
|
||||
|
||||
# 确保最低等级为1(当用户有单词时)
|
||||
self._level = max(1, self._level)
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
return round(self._level)
|
||||
|
Loading…
Reference in New Issue