Compare commits
1 Commits
master
...
Bug585-Wan
| Author | SHA1 | Date |
|---|---|---|
|
|
72fa5127b4 |
|
|
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level. E
|
||||||
|
|
||||||
`python3 main.py`
|
`python3 main.py`
|
||||||
|
|
||||||
Make sure you have put the SQLite database file in the path `app/db` (see below).
|
Make sure you have put the SQLite database file in the path `app/static` (see below).
|
||||||
|
|
||||||
|
|
||||||
## Run it as a Docker container
|
## Run it as a Docker container
|
||||||
|
|
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
|
||||||
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
|
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
|
||||||
|
|
||||||
|
|
||||||
*Last modified on 2026-03-12*
|
*Last modified on 2023-01-30*
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
|
||||||
text_level = text_difficulty_level(d['text'], d3)
|
text_level = text_difficulty_level(d['text'], d3)
|
||||||
result_of_generate_article = "found"
|
result_of_generate_article = "found"
|
||||||
|
|
||||||
today_article = {}
|
today_article = None
|
||||||
if d:
|
if d:
|
||||||
oxford_words = load_oxford_words(oxford_words_path)
|
oxford_words = load_oxford_words(oxford_words_path)
|
||||||
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
|
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
|
||||||
|
|
|
||||||
|
|
@ -144,8 +144,8 @@ if __name__ == '__main__':
|
||||||
运行程序
|
运行程序
|
||||||
'''
|
'''
|
||||||
# app.secret_key = os.urandom(16)
|
# app.secret_key = os.urandom(16)
|
||||||
app.run(debug=True, port=5000)
|
# app.run(debug=False, port='6000')
|
||||||
# app.run(debug=True)
|
app.run(debug=True)
|
||||||
# app.run(debug=True, port='6000')
|
# app.run(debug=True, port='6000')
|
||||||
# app.run(host='0.0.0.0', debug=True, port='6000')
|
# app.run(host='0.0.0.0', debug=True, port='6000')
|
||||||
# print(mod5('123'))
|
# print(mod5('123'))
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
|
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
|
||||||
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
|
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> </div>
|
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> 的 Oxford5000 单词</div>
|
||||||
<p>粘贴1篇文章 (English only)</p>
|
<p>粘贴1篇文章 (English only)</p>
|
||||||
<form method="post" action="/">
|
<form method="post" action="/">
|
||||||
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
|
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@
|
||||||
|
|
||||||
<div id="text-content">
|
<div id="text-content">
|
||||||
<div id="found">
|
<div id="found">
|
||||||
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
|
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
|
||||||
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
||||||
|
|
||||||
<button onclick="saveArticle()" >标记文章</button>
|
<button onclick="saveArticle()" >标记文章</button>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
###########################################################################
|
||||||
|
# Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
|
||||||
|
# Written permission must be obtained from the author for commercial uses.
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# Purpose: compute difficulty level of a English text
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
import math
|
||||||
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
|
||||||
|
import snowballstemmer
|
||||||
|
|
||||||
|
|
||||||
|
def load_record(pickle_fname):
|
||||||
|
with open(pickle_fname, 'rb') as f:
|
||||||
|
d = pickle.load(f)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
ENGLISH_WORD_DIFFICULTY_DICT = {}
|
||||||
|
def convert_test_type_to_difficulty_level(d):
|
||||||
|
"""
|
||||||
|
对原本的单词库中的单词进行难度评级
|
||||||
|
:param d: 存储了单词库pickle文件中的单词的字典
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
|
||||||
|
|
||||||
|
for k in L:
|
||||||
|
if 'CET4' in d[k]:
|
||||||
|
result[k] = 4 # CET4 word has level 4
|
||||||
|
elif 'OXFORD3000' in d[k]:
|
||||||
|
result[k] = 5
|
||||||
|
elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
|
||||||
|
result[k] = 6
|
||||||
|
elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
|
||||||
|
result[k] = 7
|
||||||
|
elif 'BBC' in d[k]:
|
||||||
|
result[k] = 8
|
||||||
|
|
||||||
|
global ENGLISH_WORD_DIFFICULTY_DICT
|
||||||
|
ENGLISH_WORD_DIFFICULTY_DICT = result
|
||||||
|
|
||||||
|
return result # {'apple': 4, ...}
|
||||||
|
|
||||||
|
def get_difficulty_level_for_user(d1, d2):
|
||||||
|
"""
|
||||||
|
d2 来自于词库的35511个已标记单词
|
||||||
|
d1 用户不会的词
|
||||||
|
在d2的后面添加单词,没有新建一个新的字典
|
||||||
|
"""
|
||||||
|
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
|
||||||
|
if ENGLISH_WORD_DIFFICULTY_DICT == {}:
|
||||||
|
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
|
||||||
|
else:
|
||||||
|
d2 = ENGLISH_WORD_DIFFICULTY_DICT
|
||||||
|
|
||||||
|
stemmer = snowballstemmer.stemmer('english')
|
||||||
|
|
||||||
|
for k in d1: # 用户的词
|
||||||
|
if k in d2: # 如果用户的词以原型的形式存在于词库d2中
|
||||||
|
continue # 无需评级,跳过
|
||||||
|
else:
|
||||||
|
stem = stemmer.stemWord(k)
|
||||||
|
if stem in d2: # 如果用户的词的词根存在于词库d2的词根库中
|
||||||
|
d2[k] = d2[stem] # 按照词根进行评级
|
||||||
|
else:
|
||||||
|
d2[k] = 3 # 如果k的词根都不在,那么就当认为是3级
|
||||||
|
return d2
|
||||||
|
|
||||||
|
|
||||||
|
def revert_dict(d):
|
||||||
|
'''
|
||||||
|
In d, word is the key, and value is a list of dates.
|
||||||
|
In d2 (the returned value of this function), time is the key, and the value is a list of words picked at that time.
|
||||||
|
'''
|
||||||
|
d2 = {}
|
||||||
|
for k in d:
|
||||||
|
if type(d[k]) is list: # d[k] is a list of dates.
|
||||||
|
lst = d[k]
|
||||||
|
elif type(d[
|
||||||
|
k]) is int: # for backward compatibility. d was sth like {'word':1}. The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
|
||||||
|
freq = d[k]
|
||||||
|
lst = freq * ['2021082019'] # why choose this date? No particular reasons. I fix the bug in this date.
|
||||||
|
|
||||||
|
for time_info in lst:
|
||||||
|
date = time_info[:10] # until hour
|
||||||
|
if not date in d2:
|
||||||
|
d2[date] = [k]
|
||||||
|
else:
|
||||||
|
d2[date].append(k)
|
||||||
|
return d2
|
||||||
|
|
||||||
|
|
||||||
|
class VocabularyLevelEstimator:
|
||||||
|
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
|
||||||
|
|
||||||
|
@property
|
||||||
|
def level(self):
|
||||||
|
total = 0.0 # TODO: need to compute this number
|
||||||
|
num = 1
|
||||||
|
for word in self.word_lst:
|
||||||
|
num += 1
|
||||||
|
if word in self._test:
|
||||||
|
print(f'{word} : {self._test[word]}')
|
||||||
|
else:
|
||||||
|
print(f'{word}')
|
||||||
|
return total/num
|
||||||
|
|
||||||
|
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||||
|
def __init__(self, d):
|
||||||
|
self.d = d
|
||||||
|
self.word_lst = list(d.keys())
|
||||||
|
# just look at the most recently-added words
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||||
|
def __init__(self, content):
|
||||||
|
self.content = content
|
||||||
|
self.word_lst = content.lower().split()
|
||||||
|
# select the 10 most difficult words
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
d = load_record('frequency_mrlan85.pickle')
|
||||||
|
print(d)
|
||||||
|
user = UserVocabularyLevel(d)
|
||||||
|
print(user.level) # level is a property
|
||||||
|
article = ArticleVocabularyLevel('This is an interesting article')
|
||||||
|
print(article.level)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue