From 6462ef87ccf8c02bc4c58693ca6de385efb2bc01 Mon Sep 17 00:00:00 2001 From: AsWhitale <1838528479@qq.com> Date: Thu, 29 May 2025 14:16:44 +0800 Subject: [PATCH] vocabulary --- app/static/wordfreqapp.db | Bin 0 -> 20480 bytes app/vocabulary.py | 61 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 app/static/wordfreqapp.db create mode 100644 app/vocabulary.py diff --git a/app/static/wordfreqapp.db b/app/static/wordfreqapp.db new file mode 100644 index 0000000000000000000000000000000000000000..943d6432c08d6911012a9fb19d0d8ab83a62ca8c GIT binary patch literal 20480 zcmeI&L2KJE6u@yQ&Ds)3)=S8s7tNO?vEHdgq>xrjBJJ@e-soH zMla1uz;uyKSK|E(HPM$kS(VCC 0] + return sum(valid_levels) / len(valid_levels) if valid_levels else 0 + +class ArticleVocabularyLevel(VocabularyLevelEstimator): + def __init__(self, content): + self.content = content + words = re.findall(r'\b[\w-]+\b', content.lower()) + word_levels = [self.get_word_level(word) for word in words] + valid_levels = sorted([lvl for lvl in word_levels if lvl > 0], reverse=True) + self.top_levels = valid_levels[:5] # 取前5个最高难度单词 + + @property + def level(self): + if not self.top_levels: + return 0 + return sum(self.top_levels) / len(self.top_levels) \ No newline at end of file