请老师看我们的vocabulary.py文件

2025-05-29 14:46:06 +08:00
6 changed files with 98 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level.  E
 `python3 main.py`
-Make sure you have put the SQLite database file in the path `app/db` (see below).
+Make sure you have put the SQLite database file in the path `app/static` (see below).
 ## Run it as a Docker container
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
 Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
-*Last modified on 2026-03-12*
+*Last modified on 2023-01-30*
--- a/app/Article.py
+++ b/app/Article.py
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
        text_level = text_difficulty_level(d['text'], d3)
        result_of_generate_article = "found"
-    today_article = {}
+    today_article = None
    if d:
        oxford_words = load_oxford_words(oxford_words_path)
        oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
--- a/app/main.py
+++ b/app/main.py
@ -144,8 +144,8 @@ if __name__ == '__main__':
    运行程序
    '''
    # app.secret_key = os.urandom(16)
-    app.run(debug=True, port=5000)
+    # app.run(debug=False, port='6000')
-    # app.run(debug=True)
+    app.run(debug=True)
    # app.run(debug=True, port='6000')
    # app.run(host='0.0.0.0', debug=True, port='6000')
    # print(mod5('123'))
--- a/app/templates/mainpage_get.html
+++ b/app/templates/mainpage_get.html
@ -31,7 +31,7 @@
            <p><a href="/login">登录</a>  <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
            <p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧！</b></p>
        {% endif %}
-        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> </div>
+        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> 的 Oxford5000 单词</div>
        <p>粘贴1篇文章 (English only)</p>
        <form method="post" action="/">
            <textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -87,7 +87,7 @@
    <div id="text-content">
        <div id="found">
-          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
+          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
            <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
 	    <button onclick="saveArticle()" >标记文章</button>
--- a/app/vocabulary.py
+++ b/app/vocabulary.py
@ -0,0 +1,91 @@
 '''
   Estimate a user's vocabulary level given his vocabulary data
   Estimate an English article's difficulty level given its content
   Preliminary design
   Hui, 2024-09-23
   Last upated: 2024-09-25, 2024-09-30
 '''
 import pickle
 import nltk
 DIFFICULTY_MAPPING = {
    'BBC': 2,       # 基础词汇
    'CET4': 3,      # 四级（大学英语）
    'CET6': 4,      # 六级
    'GRADUATE': 5,  # 考研词汇
    'IELTS': 6,     # 雅思
    'OXFORD3000': 4, # 牛津3000核心词
    'OXFORD5000': 7 # 牛津5000词
 }
 def load_record(pickle_fname):
    with open(pickle_fname, 'rb') as f:
        d = pickle.load(f)
    return d
 class VocabularyLevelEstimator:
    _test = load_record('words_and_tests.p') # map a word to the sources where it appears
    @property
    def level(self):
        if not self.word_lst:  # 检查是否有有效词汇
            return 0.0  # 或根据需求返回默认值
        total = 0.0
        valid_words = 0
        for word in self.word_lst:
            if word in self._test:
                sources = self._test[word]
                total += max(DIFFICULTY_MAPPING.get(src, 0) for src in sources)
                valid_words += 1
        return total / valid_words if valid_words > 0 else 0.0
 class UserVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, d, recent_n=3):
        self.d = d
        self.recent_n = recent_n
        # 按时间戳降序排序，取前recent_n个单词
        sorted_words = sorted(d.keys(), key=lambda word: d[word][-1], reverse=True)
        self.word_lst = sorted_words[:recent_n]
 class ArticleVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, content):
        self.content = content
        # 预处理：分词、小写、去标点、去停用词
        import re
        from nltk.corpus import stopwords
        nltk.download('stopwords')
        stop_words = set(stopwords.words('english'))
        words = re.findall(r'\b\w+\b', content.lower())
        self.word_lst = [word for word in words if word not in stop_words]
        # 按难度分筛选前10个最难的单词
        self.word_lst = sorted(
            self.word_lst,
            key=lambda w: self._get_difficulty(w),
            reverse=True
        )[:10]
    def _get_difficulty(self, word):
        if word in self._test:
            return max(DIFFICULTY_MAPPING.get(src, 0) for src in self._test[word])
        return 0
 if __name__ == '__main__':
    d = load_record('frequency_mrlan85.pickle')
    print(d)
    print("======================================================")
    user = UserVocabularyLevel(d)
    print(user.level) # level is a property
    print("======================================================")
    article = ArticleVocabularyLevel('This is an interesting article')
    print(article.level)