1
0
Fork 0

Compare commits

..

4 Commits

6 changed files with 7 additions and 98 deletions

View File

@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level. E
`python3 main.py` `python3 main.py`
Make sure you have put the SQLite database file in the path `app/static` (see below). Make sure you have put the SQLite database file in the path `app/db` (see below).
## Run it as a Docker container ## Run it as a Docker container
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489 Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
*Last modified on 2023-01-30* *Last modified on 2026-03-12*

View File

@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
text_level = text_difficulty_level(d['text'], d3) text_level = text_difficulty_level(d['text'], d3)
result_of_generate_article = "found" result_of_generate_article = "found"
today_article = None today_article = {}
if d: if d:
oxford_words = load_oxford_words(oxford_words_path) oxford_words = load_oxford_words(oxford_words_path)
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words) oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)

View File

@ -144,8 +144,8 @@ if __name__ == '__main__':
运行程序 运行程序
''' '''
# app.secret_key = os.urandom(16) # app.secret_key = os.urandom(16)
# app.run(debug=False, port='6000') app.run(debug=True, port=5000)
app.run(debug=True) # app.run(debug=True)
# app.run(debug=True, port='6000') # app.run(debug=True, port='6000')
# app.run(host='0.0.0.0', debug=True, port='6000') # app.run(host='0.0.0.0', debug=True, port='6000')
# print(mod5('123')) # print(mod5('123'))

View File

@ -31,7 +31,7 @@
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p > <p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p> <p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
{% endif %} {% endif %}
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> 的 Oxford5000 单词</div> <div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> </div>
<p>粘贴1篇文章 (English only)</p> <p>粘贴1篇文章 (English only)</p>
<form method="post" action="/"> <form method="post" action="/">
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/> <textarea name="content" id="article" rows="10" cols="120"></textarea><br/>

View File

@ -87,7 +87,7 @@
<div id="text-content"> <div id="text-content">
<div id="found"> <div id="found">
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div> <div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/> <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
<button onclick="saveArticle()" >标记文章</button> <button onclick="saveArticle()" >标记文章</button>

View File

@ -1,91 +0,0 @@
'''
Estimate a user's vocabulary level given his vocabulary data
Estimate an English article's difficulty level given its content
Preliminary design
Hui, 2024-09-23
Last upated: 2024-09-25, 2024-09-30
'''
import pickle
import nltk
DIFFICULTY_MAPPING = {
'BBC': 2, # 基础词汇
'CET4': 3, # 四级(大学英语)
'CET6': 4, # 六级
'GRADUATE': 5, # 考研词汇
'IELTS': 6, # 雅思
'OXFORD3000': 4, # 牛津3000核心词
'OXFORD5000': 7 # 牛津5000词
}
def load_record(pickle_fname):
with open(pickle_fname, 'rb') as f:
d = pickle.load(f)
return d
class VocabularyLevelEstimator:
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
@property
def level(self):
if not self.word_lst: # 检查是否有有效词汇
return 0.0 # 或根据需求返回默认值
total = 0.0
valid_words = 0
for word in self.word_lst:
if word in self._test:
sources = self._test[word]
total += max(DIFFICULTY_MAPPING.get(src, 0) for src in sources)
valid_words += 1
return total / valid_words if valid_words > 0 else 0.0
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, d, recent_n=3):
self.d = d
self.recent_n = recent_n
# 按时间戳降序排序取前recent_n个单词
sorted_words = sorted(d.keys(), key=lambda word: d[word][-1], reverse=True)
self.word_lst = sorted_words[:recent_n]
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content):
self.content = content
# 预处理:分词、小写、去标点、去停用词
import re
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
words = re.findall(r'\b\w+\b', content.lower())
self.word_lst = [word for word in words if word not in stop_words]
# 按难度分筛选前10个最难的单词
self.word_lst = sorted(
self.word_lst,
key=lambda w: self._get_difficulty(w),
reverse=True
)[:10]
def _get_difficulty(self, word):
if word in self._test:
return max(DIFFICULTY_MAPPING.get(src, 0) for src in self._test[word])
return 0
if __name__ == '__main__':
d = load_record('frequency_mrlan85.pickle')
print(d)
print("======================================================")
user = UserVocabularyLevel(d)
print(user.level) # level is a property
print("======================================================")
article = ArticleVocabularyLevel('This is an interesting article')
print(article.level)