wordfreqapp.db should be placed under app/db (not app/static) to protect data

Fix the webpage CRASH problem when the database has no article or when something bad happened on loading articles.
Remove ambiguities: The Oxford word coverage is X% -> X% of the words in this article are in Oxford Word 5000.
2026-03-12 16:04:03 +08:00 · 2026-03-12 16:00:53 +08:00 · 2026-03-12 15:50:59 +08:00 · 2025-09-29 15:28:41 +08:00
6 changed files with 7 additions and 146 deletions
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level.  E

 `python3 main.py`

-Make sure you have put the SQLite database file in the path `app/static` (see below).
+Make sure you have put the SQLite database file in the path `app/db` (see below).


 ## Run it as a Docker container
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
 Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489


-*Last modified on 2023-01-30*
+*Last modified on 2026-03-12*

--- a/app/Article.py
+++ b/app/Article.py
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
        text_level = text_difficulty_level(d['text'], d3)
        result_of_generate_article = "found"

-    today_article = None
+    today_article = {}
    if d:
        oxford_words = load_oxford_words(oxford_words_path)
        oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
--- a/app/main.py
+++ b/app/main.py
@ -144,8 +144,8 @@ if __name__ == '__main__':
    运行程序
    '''
    # app.secret_key = os.urandom(16)
-    # app.run(debug=False, port='6000')
-    app.run(debug=True)
+    app.run(debug=True, port=5000)
+    # app.run(debug=True)
    # app.run(debug=True, port='6000')
    # app.run(host='0.0.0.0', debug=True, port='6000')
    # print(mod5('123'))
--- a/app/templates/mainpage_get.html
+++ b/app/templates/mainpage_get.html
@ -31,7 +31,7 @@
            <p><a href="/login">登录</a>  <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
            <p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧！</b></p>
        {% endif %}
-        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> 的 Oxford5000 单词</div>
+        <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇，Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}%  </span> </div>
        <p>粘贴1篇文章 (English only)</p>
        <form method="post" action="/">
            <textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -87,7 +87,7 @@

    <div id="text-content">
        <div id="found">
-          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
+          <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span>  and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
            <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>

 	    <button onclick="saveArticle()" >标记文章</button>
--- a/app/vocabulary.py
+++ b/app/vocabulary.py
@ -1,139 +0,0 @@
-import pickle
-from collections import defaultdict
-import re
-from datetime import datetime
-
-
-def load_record(pickle_fname):
-    with open(pickle_fname, 'rb') as f:
-        d = pickle.load(f)
-    return d
-
-
-class VocabularyLevelEstimator:
-    _test = load_record('words_and_tests.p')  # map a word to the sources where it appears
-
-    def __init__(self, word_lst):
-        if not isinstance(word_lst, list):
-            raise TypeError("Input must be a list of words")
-
-        for word in word_lst:
-            if not isinstance(word, str):
-                raise TypeError("All elements in word_lst must be strings")
-
-        self.word_lst = word_lst
-
-    def calculate_level(self):
-        total_difficulty = 0.0
-        num_valid_words = 0
-
-        for word in self.word_lst:
-            if not word or not word.isalpha():
-                continue
-
-            lowercase_word = word.lower()
-
-            if lowercase_word in self._test:
-                difficulty = len(self._test[lowercase_word])
-                # Scale difficulty to match test expectations
-                if difficulty == 1:
-                    scaled_difficulty = 2
-                elif difficulty == 2:
-                    scaled_difficulty = 3
-                elif difficulty == 3:
-                    scaled_difficulty = 4
-                elif difficulty == 4:
-                    scaled_difficulty = 5
-                else:
-                    scaled_difficulty = 6
-                total_difficulty += scaled_difficulty
-                num_valid_words += 1
-            else:
-                continue
-
-        if num_valid_words == 0:
-            return 0
-
-        average_difficulty = total_difficulty / num_valid_words
-        level = int(round(average_difficulty))
-
-        # Special adjustments based on test expectations
-        if len(self.word_lst) == 1:  # Single word case
-            level = min(level, 4)
-        elif len(self.word_lst) > 30:  # Many words case
-            level = min(level + 1, 8)
-
-        return min(max(level, 1), 8)  # Ensure level is between 1-8
-
-    @property
-    def level(self):
-        return self.calculate_level()
-
-
-class UserVocabularyLevel(VocabularyLevelEstimator):
-    def __init__(self, d):
-        if not isinstance(d, dict):
-            raise TypeError("Input must be a dictionary")
-
-        self.d = d
-        # Sort words by date (most recent first)
-        sorted_words = sorted(d.items(), key=lambda x: x[1][0], reverse=True)
-        recent_words = [word for word, dates in sorted_words[:3]]
-        super().__init__(recent_words)
-
-    def calculate_level(self):
-        base_level = super().calculate_level()
-
-        # Special adjustments for user vocabulary
-        if len(self.word_lst) == 1:
-            word = self.word_lst[0].lower()
-            if word in self._test:
-                difficulty = len(self._test[word])
-                if difficulty <= 2:  # Simple word
-                    return min(base_level, 4)
-                else:  # Hard word
-                    return min(base_level + 1, 8)
-
-        # For multiple words, adjust based on test expectations
-        if len(self.word_lst) == 3:
-            return min(base_level + 1, 4)  # Ensure level doesn't exceed 4 for multiple words
-
-        return base_level
-
-
-class ArticleVocabularyLevel(VocabularyLevelEstimator):
-    def __init__(self, content):
-        if not isinstance(content, str):
-            raise TypeError("Content must be a string")
-
-        self.content = content
-        # Split into words, convert to lowercase, and remove punctuation
-        words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
-        super().__init__(words)
-
-    def calculate_article_difficulty(self):
-        level = super().calculate_level()
-        # Adjust for long paragraphs
-        if len(self.word_lst) > 100:
-            level = max(level - 1, 1)
-        return level
-
-    def get_top_n_difficult_words(self, n=10):
-        word_difficulties = {}
-        for word in self.word_lst:
-            if word in self._test:
-                difficulty = len(self._test[word])
-                word_difficulties[word] = difficulty
-
-        sorted_words = sorted(word_difficulties.items(),
-                              key=lambda item: item[1], reverse=True)
-        return sorted_words[:n]
-
-
-if __name__ == '__main__':
-    d = load_record('frequency_mrlan85.pickle')
-    print(d)
-    user = UserVocabularyLevel(d)
-    print(user.level)
-    article = ArticleVocabularyLevel('This is an interesting article')
-    print(article.level)
Author	SHA1	Message	Date
Lan Hui	c64af4a20a	wordfreqapp.db should be placed under app/db (not app/static) to protect data	2026-03-12 16:04:03 +08:00
Lan Hui	6285581bb5	Fix the webpage CRASH problem when the database has no article or when something bad happened on loading articles.	2026-03-12 16:00:53 +08:00
Lan Hui	c9bbf6b6a3	Remove ambiguities: The Oxford word coverage is X% -> X% of the words in this article are in Oxford Word 5000.	2026-03-12 15:50:59 +08:00
Lan Hui	68e4ba33c5	mainpage_get.html: 牛津单词占比率（文章中有多少单词来自 oxford 5000），而不是覆盖率（文章覆盖了多少 oxford 5000 单词）	2025-09-29 15:28:41 +08:00