forked from mrlan/EnglishPal
Compare commits
4 Commits
Bug585-YuY
...
master
| Author | SHA1 | Date |
|---|---|---|
|
|
c64af4a20a | |
|
|
6285581bb5 | |
|
|
c9bbf6b6a3 | |
|
|
68e4ba33c5 |
|
|
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level. E
|
||||||
|
|
||||||
`python3 main.py`
|
`python3 main.py`
|
||||||
|
|
||||||
Make sure you have put the SQLite database file in the path `app/static` (see below).
|
Make sure you have put the SQLite database file in the path `app/db` (see below).
|
||||||
|
|
||||||
|
|
||||||
## Run it as a Docker container
|
## Run it as a Docker container
|
||||||
|
|
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
|
||||||
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
|
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
|
||||||
|
|
||||||
|
|
||||||
*Last modified on 2023-01-30*
|
*Last modified on 2026-03-12*
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
|
||||||
text_level = text_difficulty_level(d['text'], d3)
|
text_level = text_difficulty_level(d['text'], d3)
|
||||||
result_of_generate_article = "found"
|
result_of_generate_article = "found"
|
||||||
|
|
||||||
today_article = None
|
today_article = {}
|
||||||
if d:
|
if d:
|
||||||
oxford_words = load_oxford_words(oxford_words_path)
|
oxford_words = load_oxford_words(oxford_words_path)
|
||||||
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
|
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
|
||||||
|
|
|
||||||
|
|
@ -144,8 +144,8 @@ if __name__ == '__main__':
|
||||||
运行程序
|
运行程序
|
||||||
'''
|
'''
|
||||||
# app.secret_key = os.urandom(16)
|
# app.secret_key = os.urandom(16)
|
||||||
# app.run(debug=False, port='6000')
|
app.run(debug=True, port=5000)
|
||||||
app.run(debug=True)
|
# app.run(debug=True)
|
||||||
# app.run(debug=True, port='6000')
|
# app.run(debug=True, port='6000')
|
||||||
# app.run(host='0.0.0.0', debug=True, port='6000')
|
# app.run(host='0.0.0.0', debug=True, port='6000')
|
||||||
# print(mod5('123'))
|
# print(mod5('123'))
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
|
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
|
||||||
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
|
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> 的 Oxford5000 单词</div>
|
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> </div>
|
||||||
<p>粘贴1篇文章 (English only)</p>
|
<p>粘贴1篇文章 (English only)</p>
|
||||||
<form method="post" action="/">
|
<form method="post" action="/">
|
||||||
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
|
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@
|
||||||
|
|
||||||
<div id="text-content">
|
<div id="text-content">
|
||||||
<div id="found">
|
<div id="found">
|
||||||
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
|
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
|
||||||
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
||||||
|
|
||||||
<button onclick="saveArticle()" >标记文章</button>
|
<button onclick="saveArticle()" >标记文章</button>
|
||||||
|
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
'''
|
|
||||||
Estimate a user's vocabulary level given his vocabulary data
|
|
||||||
Estimate an English article's difficulty level given its content
|
|
||||||
Preliminary design
|
|
||||||
|
|
||||||
Hui, 2024-09-23
|
|
||||||
Last upated: 2024-09-25, 2024-09-30
|
|
||||||
'''
|
|
||||||
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
import nltk
|
|
||||||
|
|
||||||
DIFFICULTY_MAPPING = {
|
|
||||||
'BBC': 2, # 基础词汇
|
|
||||||
'CET4': 3, # 四级(大学英语)
|
|
||||||
'CET6': 4, # 六级
|
|
||||||
'GRADUATE': 5, # 考研词汇
|
|
||||||
'IELTS': 6, # 雅思
|
|
||||||
'OXFORD3000': 4, # 牛津3000核心词
|
|
||||||
'OXFORD5000': 7 # 牛津5000词
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def load_record(pickle_fname):
|
|
||||||
with open(pickle_fname, 'rb') as f:
|
|
||||||
d = pickle.load(f)
|
|
||||||
return d
|
|
||||||
|
|
||||||
|
|
||||||
class VocabularyLevelEstimator:
|
|
||||||
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
|
|
||||||
|
|
||||||
@property
|
|
||||||
def level(self):
|
|
||||||
if not self.word_lst: # 检查是否有有效词汇
|
|
||||||
return 0.0 # 或根据需求返回默认值
|
|
||||||
total = 0.0
|
|
||||||
valid_words = 0
|
|
||||||
for word in self.word_lst:
|
|
||||||
if word in self._test:
|
|
||||||
sources = self._test[word]
|
|
||||||
total += max(DIFFICULTY_MAPPING.get(src, 0) for src in sources)
|
|
||||||
valid_words += 1
|
|
||||||
return total / valid_words if valid_words > 0 else 0.0
|
|
||||||
|
|
||||||
|
|
||||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
|
||||||
def __init__(self, d, recent_n=3):
|
|
||||||
self.d = d
|
|
||||||
self.recent_n = recent_n
|
|
||||||
# 按时间戳降序排序,取前recent_n个单词
|
|
||||||
sorted_words = sorted(d.keys(), key=lambda word: d[word][-1], reverse=True)
|
|
||||||
self.word_lst = sorted_words[:recent_n]
|
|
||||||
|
|
||||||
|
|
||||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
|
||||||
def __init__(self, content):
|
|
||||||
self.content = content
|
|
||||||
# 预处理:分词、小写、去标点、去停用词
|
|
||||||
import re
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
nltk.download('stopwords')
|
|
||||||
stop_words = set(stopwords.words('english'))
|
|
||||||
words = re.findall(r'\b\w+\b', content.lower())
|
|
||||||
self.word_lst = [word for word in words if word not in stop_words]
|
|
||||||
# 按难度分筛选前10个最难的单词
|
|
||||||
self.word_lst = sorted(
|
|
||||||
self.word_lst,
|
|
||||||
key=lambda w: self._get_difficulty(w),
|
|
||||||
reverse=True
|
|
||||||
)[:10]
|
|
||||||
|
|
||||||
def _get_difficulty(self, word):
|
|
||||||
if word in self._test:
|
|
||||||
return max(DIFFICULTY_MAPPING.get(src, 0) for src in self._test[word])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
d = load_record('frequency_mrlan85.pickle')
|
|
||||||
print(d)
|
|
||||||
print("======================================================")
|
|
||||||
user = UserVocabularyLevel(d)
|
|
||||||
print(user.level) # level is a property
|
|
||||||
print("======================================================")
|
|
||||||
article = ArticleVocabularyLevel('This is an interesting article')
|
|
||||||
print(article.level)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue