Compare commits

..

1 Commits

Author SHA1 Message Date
陈佳 d8e5fa4f54 请老师查阅我们组更新的vocabulary.py 2025-05-29 15:01:16 +08:00
6 changed files with 124 additions and 7 deletions

View File

@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level. E
`python3 main.py` `python3 main.py`
Make sure you have put the SQLite database file in the path `app/db` (see below). Make sure you have put the SQLite database file in the path `app/static` (see below).
## Run it as a Docker container ## Run it as a Docker container
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489 Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
*Last modified on 2026-03-12* *Last modified on 2023-01-30*

View File

@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
text_level = text_difficulty_level(d['text'], d3) text_level = text_difficulty_level(d['text'], d3)
result_of_generate_article = "found" result_of_generate_article = "found"
today_article = {} today_article = None
if d: if d:
oxford_words = load_oxford_words(oxford_words_path) oxford_words = load_oxford_words(oxford_words_path)
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words) oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)

View File

@ -144,8 +144,8 @@ if __name__ == '__main__':
运行程序 运行程序
''' '''
# app.secret_key = os.urandom(16) # app.secret_key = os.urandom(16)
app.run(debug=True, port=5000) # app.run(debug=False, port='6000')
# app.run(debug=True) app.run(debug=True)
# app.run(debug=True, port='6000') # app.run(debug=True, port='6000')
# app.run(host='0.0.0.0', debug=True, port='6000') # app.run(host='0.0.0.0', debug=True, port='6000')
# print(mod5('123')) # print(mod5('123'))

View File

@ -31,7 +31,7 @@
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p > <p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p> <p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
{% endif %} {% endif %}
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> </div> <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> 的 Oxford5000 单词</div>
<p>粘贴1篇文章 (English only)</p> <p>粘贴1篇文章 (English only)</p>
<form method="post" action="/"> <form method="post" action="/">
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/> <textarea name="content" id="article" rows="10" cols="120"></textarea><br/>

View File

@ -87,7 +87,7 @@
<div id="text-content"> <div id="text-content">
<div id="found"> <div id="found">
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div> <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/> <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
<button onclick="saveArticle()" >标记文章</button> <button onclick="saveArticle()" >标记文章</button>

117
app/vocabulary(2).py Normal file
View File

@ -0,0 +1,117 @@
import pickle
import string
import os
def read_pickle(file_name):
"""Read data from a pickle file if it exists."""
if not os.path.isfile(file_name):
print(f"File {file_name} does not exist.")
return {}
with open(file_name, 'rb') as f:
return pickle.load(f)
class VocabularyLevelEstimator:
# Load the word database for testing
_word_db = read_pickle('words_and_tests.p')
def evaluate_word_difficulty(self, word):
"""Evaluate the difficulty level of a word."""
# Handle empty strings, punctuation only, or purely numeric strings.
if not word or all(ch in string.punctuation for ch in word) or word.isdigit():
return 0
return 5 if word in self._word_db else 0
@property
def difficulty_level(self):
return self.calculate_longest_words_level()
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, vocab_dict):
self.vocab_dict = vocab_dict
# Use the last three words from the vocabulary dictionary.
self.recent_words = list(vocab_dict.keys())[-3:]
@property
def level(self):
"""Calculate the user's vocabulary level considering word validity."""
valid_words = [word for word in self.recent_words if self.evaluate_word_difficulty(word) > 0]
valid_count = len(valid_words)
if valid_count == 0:
return 0
# Score for a single valid word
if valid_count == 1:
return self.score_single_word(valid_words[0])
# Logic for scoring multiple valid words
return self.score_multiple_words(valid_words)
def score_single_word(self, word):
"""Evaluate the score of a single word."""
length = len(word)
if length < 7:
return 2
elif 7 <= length < 8:
return 5
return 6
def score_multiple_words(self, valid_words):
"""Calculate the score for multiple valid words."""
total_score = 0
for word in valid_words:
length = len(word)
if length < 5:
total_score += 1
elif 5 <= length < 8:
total_score += 3
else:
total_score += 5
average_score = total_score / len(valid_words)
return min(int(average_score * 1.6), 8)
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, article_text):
self.article_text = article_text
# Extract valid words and strip punctuation.
self.words = [
word.strip(string.punctuation)
for word in article_text.lower().split()
if word.strip(string.punctuation)
]
@property
def level(self):
"""Evaluate the article difficulty based on the longest ten valid words."""
valid_words = [word for word in self.words if self.evaluate_word_difficulty(word) > 0]
longest_words = sorted(valid_words, key=len, reverse=True)[:10]
if not longest_words:
return 0
difficulty_ratings = []
for word in longest_words:
length = len(word)
if length < 5:
difficulty_ratings.append(0.1)
elif 5 <= length < 8:
difficulty_ratings.append(0.2)
elif 8 <= length < 11:
difficulty_ratings.append(0.3)
else:
difficulty_ratings.append(0.5)
return sum(difficulty_ratings) + 2
if __name__ == '__main__':
vocab_dict = read_pickle('frequency_mrlan85.pickle')
print(vocab_dict)
user = UserVocabularyLevel(vocab_dict)
print(user.level)
article = ArticleVocabularyLevel('This is an interesting article.')
print(article.level)