请老师查阅我们组更新的vocabulary.py

2025-05-29 15:01:16 +08:00
6 changed files with 124 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level.  E
 `python3 main.py`
-Make sure you have put the SQLite database file in the path `app/db` (see below).
+Make sure you have put the SQLite database file in the path `app/static` (see below).
 ## Run it as a Docker container
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
 Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
-*Last modified on 2026-03-12*
+*Last modified on 2023-01-30*
--- a/app/Article.py
+++ b/app/Article.py
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
        text_level = text_difficulty_level(d['text'], d3)
        result_of_generate_article = "found"
-    today_article = {}
+    today_article = None
    if d:
        oxford_words = load_oxford_words(oxford_words_path)
        oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
--- a/app/main.py
+++ b/app/main.py
@ -144,8 +144,8 @@ if __name__ == '__main__':
    运行程序
    '''
    # app.secret_key = os.urandom(16)
-    app.run(debug=True, port=5000)
+    # app.run(debug=False, port='6000')
-    # app.run(debug=True)
+    app.run(debug=True)
    # app.run(debug=True, port='6000')
    # app.run(host='0.0.0.0', debug=True, port='6000')
    # print(mod5('123'))
--- a/app/templates/mainpage_get.html
+++ b/app/templates/mainpage_get.html
@ -31,7 +31,7 @@
            <p><a href="/login">登录</a>  <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
            <p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧！</b></p>
        {% endif %}
-        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> </div>
+        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> 的 Oxford5000 单词</div>
        <p>粘贴1篇文章 (English only)</p>
        <form method="post" action="/">
            <textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -87,7 +87,7 @@
    <div id="text-content">
        <div id="found">
-          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
+          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
            <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
 	    <button onclick="saveArticle()" >标记文章</button>
--- a/app/vocabulary(2).py
+++ b/app/vocabulary(2).py
@ -0,0 +1,117 @@
 import pickle
 import string
 import os
 def read_pickle(file_name):
    """Read data from a pickle file if it exists."""
    if not os.path.isfile(file_name):
        print(f"File {file_name} does not exist.")
        return {}
    with open(file_name, 'rb') as f:
        return pickle.load(f)
 class VocabularyLevelEstimator:
    # Load the word database for testing
    _word_db = read_pickle('words_and_tests.p')
    def evaluate_word_difficulty(self, word):
        """Evaluate the difficulty level of a word."""
        # Handle empty strings, punctuation only, or purely numeric strings.
        if not word or all(ch in string.punctuation for ch in word) or word.isdigit():
            return 0
        return 5 if word in self._word_db else 0
    @property
    def difficulty_level(self):
        return self.calculate_longest_words_level()
 class UserVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, vocab_dict):
        self.vocab_dict = vocab_dict
        # Use the last three words from the vocabulary dictionary.
        self.recent_words = list(vocab_dict.keys())[-3:]
    @property
    def level(self):
        """Calculate the user's vocabulary level considering word validity."""
        valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0]
        valid_count = len(valid_words)
        if valid_count == 0:
            return 0
        # Score for a single valid word
        if valid_count == 1:
            return self.score_single_word(valid_words[0])
        # Logic for scoring multiple valid words
        return self.score_multiple_words(valid_words)
    def score_single_word(self, word):
        """Evaluate the score of a single word."""
        length = len(word)
        if length < 7:
            return 2
        elif 7 <= length < 8:
            return 5
        return 6
    def score_multiple_words(self, valid_words):
        """Calculate the score for multiple valid words."""
        total_score = 0
        for word in valid_words:
            length = len(word)
            if length < 5:
                total_score += 1
            elif 5 <= length < 8:
                total_score += 3
            else:
                total_score += 5
        average_score = total_score / len(valid_words)
        return min(int(average_score * 1.6), 8)
 class ArticleVocabularyLevel(VocabularyLevelEstimator):
    def __init__(self, article_text):
        self.article_text = article_text
        # Extract valid words and strip punctuation.
        self.words = [
            word.strip(string.punctuation)
            for word in article_text.lower().split()
            if word.strip(string.punctuation)
        ]
    @property
    def level(self):
        """Evaluate the article difficulty based on the longest ten valid words."""
        valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0]
        longest_words = sorted(valid_words, key=len, reverse=True)[:10]
        if not longest_words:
            return 0
        difficulty_ratings = []
        for word in longest_words:
            length = len(word)
            if length < 5:
                difficulty_ratings.append(0.1)
            elif 5 <= length < 8:
                difficulty_ratings.append(0.2)
            elif 8 <= length < 11:
                difficulty_ratings.append(0.3)
            else:
                difficulty_ratings.append(0.5)
        return sum(difficulty_ratings) + 2
 if __name__ == '__main__':
    vocab_dict = read_pickle('frequency_mrlan85.pickle')
    print(vocab_dict)
    user = UserVocabularyLevel(vocab_dict)
    print(user.level)
    article = ArticleVocabularyLevel('This is an interesting article.')
    print(article.level)