Compare commits

...

34 Commits

Author SHA1 Message Date
丁晟晔 ff6286cf01 删除 app/test/test_bug551_DingZeYu.py 2024-05-06 11:42:32 +08:00
丁晟晔 1d7e61d751 上传文件至 app/test 2024-05-06 11:36:36 +08:00
顾涵 708a6a2821 Merge pull request 'WIP:Bug529-GuHan' (#88) from Bug529-GuHan into master
Reviewed-on: http://121.4.94.30:3000/mrlan/EnglishPal/pulls/88
2023-06-04 12:39:34 +08:00
顾涵 688a198768 已经与Alpha-snapshot20230525 分支同步,重新提交 2023-05-28 16:31:12 +08:00
寻宇灿 1543b3095d Merge remote-tracking branch 'origin/Alpha-snapshot20230519' into Refactor-XunYucan 2023-05-25 22:30:06 +08:00
寻宇灿 c6bf323c60 修改格式 2023-05-25 21:23:25 +08:00
寻宇灿 03ccb3527a 重构前端阅读js,新增阅读器全局对象,新增生词朗读按钮 2023-05-25 17:35:31 +08:00
Hui Lan b41e1044bc difficulty.py: add some stop words, hoping that getting the next article can be faster. 2023-05-24 10:12:44 +08:00
Hui Lan 67e921ba60 difficulty.py: todo. 2023-05-23 22:25:40 +08:00
Hui Lan a5c3564f15 difficulty.py: do not stem a word twice. 2023-05-23 22:22:57 +08:00
Hui Lan 1295616d5b Merge branch 'Bug476-YuHuangtao' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230519 2023-05-23 19:50:30 +08:00
俞黄焘 c151a0efaa 去掉了get_difficulty_level_for_user的多出的break 2023-05-23 19:40:33 +08:00
顾涵 030b89706e special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' 用于过滤字符,我将其中的“-”删去,使连字符没有被过滤,实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug,答案是肯定的,但是这段代码中的过滤字符虽然多,但是并没有完全过滤掉所有字符,(过滤的只是键盘上能打出的字符,不包括输入法中能打出的特殊字符),所以字符bug本身就一直存在,我认为减少一个“-”字符对程序的过滤过程不会造成问题。 2023-05-20 15:29:12 +08:00
Hui Lan 349488167b requirements.txt: install snowballstemmer for better computing a word's difficulty level. 2023-05-19 09:03:20 +08:00
俞黄焘 39d96014d9 pull最新的snapshot-20230511,后更新了difficulty.py和Article.py的部分代码,提交了新的pickle文件 2023-05-18 23:29:38 +08:00
顾涵 acd8db6e3e special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' 用于过滤字符,我将其中的“-”删去,使连字符没有被过滤,实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug,答案是肯定的,但是这段代码中的过滤字符虽然多,但是并没有完全过滤掉所有字符,(过滤的只是键盘上能打出的字符,不包括输入法中能打出的特殊字符),所以字符bug本身就一直存在,我认为减少一个对“1-”字符的过滤不会造成问题。 2023-05-15 19:24:43 +08:00
顾涵 9f3f5b43e1 special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' 用于过滤字符,我将其中的“-”删去,使连字符没有被过滤,实现录入例如fifty-six等组合词的功能。另外对于删除过滤是否会引发字符bug,答案是肯定的,但是这段代码中的过滤字符虽然多,但是并没有完全过滤掉所有字符,(过滤的只是键盘上能打出的字符,不包括输入法中能打出的特殊字符),所以字符bug本身就一直存在,我认为减少一个对“-”字符的过滤不会造成问题。 2023-05-15 19:15:30 +08:00
huangdan d9f6df7fbe AJAX载入文章数据 2023-05-11 15:51:10 +08:00
huangdan 5039f5710e AJAX载入文章数据 2023-05-08 14:33:48 +08:00
Hui Lan becef7e343 Merge branch 'Bug502-YuGaoXiang' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230506 2023-05-07 15:59:35 +08:00
吴宇涵 01ecc83768 refactor: refactor the way to check article level 2023-05-06 17:42:04 +08:00
吴宇涵 f64d06fbbf fix: fix Bug 531 and use ES6 grammar 2023-05-06 17:24:51 +08:00
Hui Lan a4cc4fd011 Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230506 2023-05-06 17:16:08 +08:00
ZhuZhihao 18ca48b422 Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into Bug522-HuangZirui 2023-05-05 17:21:49 +08:00
ZhuZhihao a80b062b87 refactor: remove variable 'count' 2023-05-05 17:20:58 +08:00
Hui Lan 779dafefe8 Merge branch 'Bug509-XieQiuHan-WangZiming' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230427 2023-04-27 07:21:15 +08:00
Hui Lan e118d92659 Merge branch 'Alpha-snapshot20230425' of http://121.4.94.30:3000/mrlan/EnglishPal into Alpha-snapshot20230427 2023-04-27 07:20:21 +08:00
王梓铭 d30a434b2a 修改变量名had_read_articles->visited_articles 2023-04-25 17:47:51 +08:00
zzhaofisher ce2e1f2978 Merge branch 'DevLocal' into Bug522-HuangZirui 2023-04-18 21:52:28 +08:00
zzhaofisher 11ae093fd7 Merge branch 'Alpha' into Bug522-HuangZirui 2023-04-18 21:52:01 +08:00
zzhaofisher cc8ca47f8c refactor: remove sql sentences 2023-04-18 21:50:54 +08:00
zzhaofisher 5d20e92061 Merge branch 'Bug522-HuangZirui' of http://121.4.94.30:3000/mrlan/EnglishPal into DevLocal 2023-04-18 21:50:18 +08:00
Hui Lan 3bce450620 黄子睿: 修复 'Otherwise,' 这种情况无法高亮的问题,即 Otherwise 后面跟了个逗号 2022-12-15 10:50:04 +08:00
Hui Lan 417dbc22f8 highlight.js: fix Bug 522. 2022-12-09 13:19:36 +08:00
17 changed files with 296 additions and 234 deletions

View File

@ -7,7 +7,7 @@ import random, glob
import hashlib import hashlib
from datetime import datetime from datetime import datetime
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = '/var/www/wordfreq/wordfreq/'
@ -32,20 +32,20 @@ def get_article_body(s):
return '\n'.join(lst) return '\n'.join(lst)
def get_today_article(user_word_list, had_read_articles): def get_today_article(user_word_list, visited_articles):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
if had_read_articles is None: if visited_articles is None:
had_read_articles = { visited_articles = {
"index" : 0, # 为 article_ids 的索引 "index" : 0, # 为 article_ids 的索引
"article_ids": [] # 之前显示文章的id列表越后越新 "article_ids": [] # 之前显示文章的id列表越后越新
} }
if had_read_articles["index"] > len(had_read_articles["article_ids"])-1: # 生成新的文章,因此查找所有的文章 if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章,因此查找所有的文章
rq.instructions("SELECT * FROM article") rq.instructions("SELECT * FROM article")
else: # 生成阅读过的文章,因此查询指定 article_id 的文章 else: # 生成阅读过的文章,因此查询指定 article_id 的文章
if had_read_articles["article_ids"][had_read_articles["index"]] == 'null': # 可能因为直接刷新页面导致直接去查询了'null',因此当刷新的页面的时候,需要直接进行“上一篇”操作 if visited_articles["article_ids"][visited_articles["index"]] == 'null': # 可能因为直接刷新页面导致直接去查询了'null',因此当刷新的页面的时候,需要直接进行“上一篇”操作
had_read_articles["index"] -= 1 visited_articles["index"] -= 1
had_read_articles["article_ids"].pop() visited_articles["article_ids"].pop()
rq.instructions('SELECT * FROM article WHERE article_id=%d' % (had_read_articles["article_ids"][had_read_articles["index"]])) rq.instructions('SELECT * FROM article WHERE article_id=%d' % (visited_articles["article_ids"][visited_articles["index"]]))
rq.do() rq.do()
result = rq.get_results() result = rq.get_results()
random.shuffle(result) random.shuffle(result)
@ -53,32 +53,32 @@ def get_today_article(user_word_list, had_read_articles):
# Choose article according to reader's level # Choose article according to reader's level
d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p') d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d2 = load_freq_history(path_prefix + 'static/words_and_tests.p') d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
d3 = get_difficulty_level(d1, d2) d3 = get_difficulty_level_for_user(d1, d2)
d = None d = None
result_of_generate_article = "not found" result_of_generate_article = "not found"
d_user = load_freq_history(user_word_list) d_user = load_freq_history(user_word_list)
user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered. user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
text_level = 0 text_level = 0
if had_read_articles["index"] > len(had_read_articles["article_ids"])-1: # 生成新的文章 if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章
amount_of_had_read_articles = len(had_read_articles["article_ids"]) amount_of_visited_articles = len(visited_articles["article_ids"])
amount_of_existing_articles = result.__len__() amount_of_existing_articles = result.__len__()
if amount_of_had_read_articles == amount_of_existing_articles: # 如果当前阅读过的文章的数量 == 存在的文章的数量,即所有的书本都阅读过了 if amount_of_visited_articles == amount_of_existing_articles: # 如果当前阅读过的文章的数量 == 存在的文章的数量,即所有的书本都阅读过了
result_of_generate_article = "had read all articles" result_of_generate_article = "had read all articles"
else: else:
for k in range(3): # 最多尝试3次 for k in range(3): # 最多尝试3次
for reading in result: for reading in result:
text_level = text_difficulty_level(reading['text'], d3) text_level = text_difficulty_level(reading['text'], d3)
factor = random.gauss(0.8, 0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1 factor = random.gauss(0.8, 0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
if reading['article_id'] not in had_read_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor): # 新的文章之前没有出现过且符合一定范围的水平 if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor): # 新的文章之前没有出现过且符合一定范围的水平
d = reading d = reading
had_read_articles["article_ids"].append(d['article_id']) # 列表添加新的文章id下面进行 visited_articles["article_ids"].append(d['article_id']) # 列表添加新的文章id下面进行
result_of_generate_article = "found" result_of_generate_article = "found"
break break
if result_of_generate_article == "found": # 用于成功找到文章后及时退出外层循环 if result_of_generate_article == "found": # 用于成功找到文章后及时退出外层循环
break break
if result_of_generate_article != "found": # 阅读完所有文章或者循环3次没有找到适合的文章则放入空“null” if result_of_generate_article != "found": # 阅读完所有文章或者循环3次没有找到适合的文章则放入空“null”
had_read_articles["article_ids"].append('null') visited_articles["article_ids"].append('null')
else: # 生成已经阅读过的文章 else: # 生成已经阅读过的文章
d = random.choice(result) d = random.choice(result)
text_level = text_difficulty_level(d['text'], d3) text_level = text_difficulty_level(d['text'], d3)
@ -97,7 +97,7 @@ def get_today_article(user_word_list, had_read_articles):
"answer": get_answer_part(d['question']) "answer": get_answer_part(d['question'])
} }
return had_read_articles, today_article, result_of_generate_article return visited_articles, today_article, result_of_generate_article
def load_freq_history(path): def load_freq_history(path):

View File

@ -3,6 +3,18 @@ import string
from datetime import datetime, timedelta from datetime import datetime, timedelta
from UseSqlite import InsertQuery, RecordQuery from UseSqlite import InsertQuery, RecordQuery
def md5(s):
'''
MD5摘要
:param str: 字符串
:return: 经MD5以后的字符串
'''
h = hashlib.md5(s.encode(encoding='utf-8'))
return h.hexdigest()
# import model.user after the defination of md5(s) to avoid circular import
from model.user import get_user_by_username, insert_user, update_password_by_username
path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment path_prefix = './' # comment this line in deployment
@ -12,13 +24,9 @@ def verify_pass(newpass,oldpass):
def verify_user(username, password): def verify_user(username, password):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') user = get_user_by_username(username)
password = md5(username + password) encoded_password = md5(username + password)
rq.instructions_with_parameters("SELECT * FROM user WHERE name=:username AND password=:password", dict( return user is not None and user.password == encoded_password
username=username, password=password)) # the named style https://docs.python.org/3/library/sqlite3.html
rq.do_with_parameters()
result = rq.get_results()
return result != []
def add_user(username, password): def add_user(username, password):
@ -26,19 +34,12 @@ def add_user(username, password):
expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d') # will expire after 30 days expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d') # will expire after 30 days
# 将用户名和密码一起加密,以免暴露不同用户的相同密码 # 将用户名和密码一起加密,以免暴露不同用户的相同密码
password = md5(username + password) password = md5(username + password)
rq = InsertQuery(path_prefix + 'static/wordfreqapp.db') insert_user(username=username, password=password, start_date=start_date, expiry_date=expiry_date)
rq.instructions_with_parameters("INSERT INTO user VALUES (:username, :password, :start_date, :expiry_date)", dict(
username=username, password=password, start_date=start_date, expiry_date=expiry_date))
rq.do_with_parameters()
def check_username_availability(username): def check_username_availability(username):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') existed_user = get_user_by_username(username)
rq.instructions_with_parameters( return existed_user is None
"SELECT * FROM user WHERE name=:username", dict(username=username))
rq.do_with_parameters()
result = rq.get_results()
return result == []
def change_password(username, old_password, new_password): def change_password(username, old_password, new_password):
@ -54,35 +55,16 @@ def change_password(username, old_password, new_password):
# 将用户名和密码一起加密,以免暴露不同用户的相同密码 # 将用户名和密码一起加密,以免暴露不同用户的相同密码
if verify_pass(new_password,old_password): #新旧密码一致 if verify_pass(new_password,old_password): #新旧密码一致
return False return False
password = md5(username + new_password) update_password_by_username(username, new_password)
rq = InsertQuery(path_prefix + 'static/wordfreqapp.db')
rq.instructions_with_parameters("UPDATE user SET password=:password WHERE name=:username", dict(
password=password, username=username))
rq.do_with_parameters()
return True return True
def get_expiry_date(username): def get_expiry_date(username):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') user = get_user_by_username(username)
rq.instructions_with_parameters( if user is None:
"SELECT expiry_date FROM user WHERE name=:username", dict(username=username))
rq.do_with_parameters()
result = rq.get_results()
if len(result) > 0:
return result[0]['expiry_date']
else:
return '20191024' return '20191024'
else:
return user.expiry_date
def md5(s):
'''
MD5摘要
:param str: 字符串
:return: 经MD5以后的字符串
'''
h = hashlib.md5(s.encode(encoding='utf-8'))
return h.hexdigest()
class UserName: class UserName:
def __init__(self, username): def __init__(self, username):

View File

@ -37,7 +37,7 @@ def signup():
session[username] = username session[username] = username
session['username'] = username session['username'] = username
session['expiry_date'] = get_expiry_date(username) session['expiry_date'] = get_expiry_date(username)
session['had_read_articles'] = None session['visited_articles'] = None
return jsonify({'status': '2'}) return jsonify({'status': '2'})
else: else:
return jsonify({'status': '1'}) return jsonify({'status': '1'})
@ -66,7 +66,7 @@ def login():
session['username'] = username session['username'] = username
user_expiry_date = get_expiry_date(username) user_expiry_date = get_expiry_date(username)
session['expiry_date'] = user_expiry_date session['expiry_date'] = user_expiry_date
session['had_read_articles'] = None session['visited_articles'] = None
return jsonify({'status': '1'}) return jsonify({'status': '1'})
else: else:
return jsonify({'status': '0'}) return jsonify({'status': '0'})

View File

@ -91,10 +91,7 @@ def article():
question = data.get("question", "") question = data.get("question", "")
level = data.get("level", "4") level = data.get("level", "4")
if content: if content:
try: # check level
if level not in ['1', '2', '3', '4']: if level not in ['1', '2', '3', '4']:
raise ValueError
except ValueError:
return "Level must be between 1 and 4." return "Level must be between 1 and 4."
add_article(content, source, level, question) add_article(content, source, level, question)
_update_context() _update_context()

View File

@ -8,6 +8,7 @@
import pickle import pickle
import math import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer
def load_record(pickle_fname): def load_record(pickle_fname):
@ -17,40 +18,50 @@ def load_record(pickle_fname):
return d return d
def difficulty_level_from_frequency(word, d): def convert_test_type_to_difficulty_level(d):
level = 1 """
if not word in d: 对原本的单词库中的单词进行难度评级
return level :param d: 存储了单词库pickle文件中的单词的字典
:return:
"""
result = {}
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
if 'what' in d: for k in L:
ratio = (d['what']+1)/(d[word]+1) # what is a frequent word if 'CET4' in d[k]:
level = math.log( max(ratio, 1), 2) result[k] = 4 # CET4 word has level 4
elif 'OXFORD3000' in d[k]:
result[k] = 5
elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
result[k] = 6
elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
result[k] = 7
elif 'BBC' in d[k]:
result[k] = 8
level = min(level, 8) return result # {'apple': 4, ...}
return level
def get_difficulty_level(d1, d2): def get_difficulty_level_for_user(d1, d2):
d = {} """
L = list(d1.keys()) # in d1, we have freuqence for each word d2 来自于词库的35511个已标记单词
L2 = list(d2.keys()) # in d2, we have test types (e.g., CET4,CET6,BBC) for each word d1 用户不会的词
L.extend(L2) 在d2的后面添加单词没有新建一个新的字典
L3 = list(set(L)) # L3 contains all words """
for k in L3: # TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
if k in d2: d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
if 'CET4' in d2[k]: stemmer = snowballstemmer.stemmer('english')
d[k] = 4 # CET4 word has level 4
elif 'CET6' in d2[k]:
d[k] = 6
elif 'BBC' in d2[k]:
d[k] = 8
if k in d1: # BBC could contain easy words that are not in CET4 or CET6. So 4 is not reasonable. Recompute difficulty level.
d[k] = min(difficulty_level_from_frequency(k, d1), d[k])
elif k in d1:
d[k] = difficulty_level_from_frequency(k, d1)
return d
for k in d1: # 用户的词
if k in d2: # 如果用户的词以原型的形式存在于词库d2中
continue # 无需评级,跳过
else:
stem = stemmer.stemWord(k)
if stem in d2: # 如果用户的词的词根存在于词库d2的词根库中
d2[k] = d2[stem] # 按照词根进行评级
else:
d2[k] = 3 # 如果k的词根都不在那么就当认为是3级
return d2
def revert_dict(d): def revert_dict(d):
@ -62,7 +73,8 @@ def revert_dict(d):
for k in d: for k in d:
if type(d[k]) is list: # d[k] is a list of dates. if type(d[k]) is list: # d[k] is a list of dates.
lst = d[k] lst = d[k]
elif type(d[k]) is int: # for backward compatibility. d was sth like {'word':1}. The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book. elif type(d[
k]) is int: # for backward compatibility. d was sth like {'word':1}. The value d[k] is not a list of dates, but a number representing how frequent this word had been added to the new word book.
freq = d[k] freq = d[k]
lst = freq * ['2021082019'] # why choose this date? No particular reasons. I fix the bug in this date. lst = freq * ['2021082019'] # why choose this date? No particular reasons. I fix the bug in this date.
@ -79,7 +91,8 @@ def user_difficulty_level(d_user, d):
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = 0 count = 0
geometric = 1 geometric = 1
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level) lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst: for word in lst:
@ -105,9 +118,10 @@ def text_difficulty_level(s, d):
L = freq(s) L = freq(s)
lst = [] # a list of tuples, each tuple being (word, difficulty level) lst = [] # a list of tuples, each tuple being (word, difficulty level)
stop_words = {'the':1, 'and':1, 'of':1, 'to':1, 'what':1, 'in':1, 'there':1, 'when':1, 'them':1, 'would':1, 'will':1, 'out':1, 'his':1, 'mr':1, 'that':1, 'up':1, 'more':1, 'your':1, 'it':1, 'now':1, 'very':1, 'then':1, 'could':1, 'he':1, 'any':1, 'some':1, 'with':1, 'into':1, 'you':1, 'our':1, 'man':1, 'other':1, 'time':1, 'was':1, 'than':1, 'know':1, 'about':1, 'only':1, 'like':1, 'how':1, 'see':1, 'is':1, 'before':1, 'such':1, 'little':1, 'two':1, 'its':1, 'as':1, 'these':1, 'may':1, 'much':1, 'down':1, 'for':1, 'well':1, 'should':1, 'those':1, 'after':1, 'same':1, 'must':1, 'say':1, 'first':1, 'again':1, 'us':1, 'great':1, 'where':1, 'being':1, 'come':1, 'over':1, 'good':1, 'himself':1, 'am':1, 'never':1, 'on':1, 'old':1, 'here':1, 'way':1, 'at':1, 'go':1, 'upon':1, 'have':1, 'had':1, 'without':1, 'my':1, 'day':1, 'be':1, 'but':1, 'though':1, 'from':1, 'not':1, 'too':1, 'another':1, 'this':1, 'even':1, 'still':1, 'her':1, 'yet':1, 'under':1, 'by':1, 'let':1, 'just':1, 'all':1, 'because':1, 'we':1, 'always':1, 'off':1, 'yes':1, 'so':1, 'while':1, 'why':1, 'which':1, 'me':1, 'are':1, 'or':1, 'no':1, 'if':1, 'an':1, 'also':1, 'thus':1, 'who':1, 'cannot':1, 'she':1, 'whether':1} # ignore these words while computing the artile's difficulty level
for x in L: for x in L:
word = x[0] word = x[0]
if word in d: if word not in stop_words and word in d:
lst.append((word, d[word])) lst.append((word, d[word]))
lst2 = sort_in_descending_order(lst) # most difficult words on top lst2 = sort_in_descending_order(lst) # most difficult words on top
@ -125,18 +139,14 @@ def text_difficulty_level(s, d):
return geometric ** (1 / max(count, 1)) return geometric ** (1 / max(count, 1))
if __name__ == '__main__': if __name__ == '__main__':
d1 = load_record('frequency.p') d1 = load_record('frequency.p')
# print(d1) # print(d1)
d2 = load_record('words_and_tests.p') d2 = load_record('words_and_tests.p')
# print(d2) # print(d2)
d3 = get_difficulty_level_for_user(d1, d2)
d3 = get_difficulty_level(d1, d2)
s = ''' s = '''
South Lawn South Lawn
@ -197,7 +207,6 @@ Amidst the aftermath of this shocking referendum vote, there is great uncertaint
''' '''
s = ''' s = '''
British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE) British Prime Minister Boris Johnson walks towards a voting station during the Brexit referendum in Britain, June 23, 2016. (Photo: EPA-EFE)
@ -218,7 +227,6 @@ The prime minister was forced to ask for an extension to Britain's EU departure
Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues. Johnson has repeatedly pledged to finalize the first stage, a transition deal, of Britain's EU divorce battle by Oct. 31. A second stage will involve negotiating its future relationship with the EU on trade, security and other salient issues.
''' '''
s = ''' s = '''
Thank you very much. We have a Cabinet meeting. Well have a few questions after grace. And, if you would, Ben, please do the honors. Thank you very much. We have a Cabinet meeting. Well have a few questions after grace. And, if you would, Ben, please do the honors.
@ -233,17 +241,11 @@ We need — for our farmers, our manufacturers, for, frankly, unions and non-uni
''' '''
# f = open('bbc-fulltext/bbc/entertainment/001.txt') # f = open('bbc-fulltext/bbc/entertainment/001.txt')
f = open('wordlist.txt') f = open('wordlist.txt')
s = f.read() s = f.read()
f.close() f.close()
print(text_difficulty_level(s, d3)) print(text_difficulty_level(s, d3))

View File

@ -1,5 +1,6 @@
from model import * from model import *
from Login import md5 from Login import md5
from pony import orm
def get_users(): def get_users():
with db_session: with db_session:
@ -11,6 +12,11 @@ def get_user_by_username(username):
if user: if user:
return user.first() return user.first()
def insert_user(username, password, start_date, expiry_date):
with db_session:
user = User(name=username, password=password, start_date=start_date, expiry_date=expiry_date)
orm.commit()
def update_password_by_username(username, password="123456"): def update_password_by_username(username, password="123456"):
with db_session: with db_session:
user = User.select(name=username) user = User.select(name=username)

View File

@ -7,6 +7,7 @@ css:
js: js:
head: # 在页面加载之前加载 head: # 在页面加载之前加载
- ../static/js/jquery.js - ../static/js/jquery.js
- ../static/js/read.js
- ../static/js/word_operation.js - ../static/js/word_operation.js
bottom: # 在页面加载完之后加载 bottom: # 在页面加载完之后加载
- ../static/js/fillword.js - ../static/js/fillword.js

View File

@ -1,9 +1,5 @@
let isRead = true; let isRead = true;
let isChoose = true; let isChoose = true;
let reader = window.speechSynthesis; // 全局定义朗读者,以便朗读和暂停
let current_position = 0; // 朗读文本的当前位置
let original_position = 0; // 朗读文本的初始位置
let to_speak = ""; // 朗读的初始内容
function getWord() { function getWord() {
return window.getSelection ? window.getSelection() : document.selection.createRange().text; return window.getSelection ? window.getSelection() : document.selection.createRange().text;
@ -11,7 +7,7 @@ function getWord() {
function fillInWord() { function fillInWord() {
let word = getWord(); let word = getWord();
if (isRead) read(word); if (isRead) Reader.read(word, inputSlider.value);
if (!isChoose) return; if (!isChoose) return;
const element = document.getElementById("selected-words"); const element = document.getElementById("selected-words");
element.value = element.value + " " + word; element.value = element.value + " " + word;
@ -19,50 +15,17 @@ function fillInWord() {
document.getElementById("text-content").addEventListener("click", fillInWord, false); document.getElementById("text-content").addEventListener("click", fillInWord, false);
function makeUtterance(str, rate) { const sliderValue = document.getElementById("rangeValue");
let msg = new SpeechSynthesisUtterance(str); const inputSlider = document.getElementById("rangeComponent");
msg.rate = rate;
msg.lang = "en-US"; // TODO: add language options menu
msg.onboundary = ev => {
if (ev.name == "word") {
current_position = ev.charIndex;
}
}
return msg;
}
const sliderValue = document.getElementById("rangeValue"); // 显示值
const inputSlider = document.getElementById("rangeComponent"); // 滑块元素
inputSlider.oninput = () => { inputSlider.oninput = () => {
let value = inputSlider.value; // 获取滑块的值 let value = inputSlider.value;
sliderValue.textContent = value + '×'; sliderValue.textContent = value + '×';
if (!reader.speaking) return;
reader.cancel();
let msg = makeUtterance(to_speak.substring(original_position + current_position), value);
original_position = original_position + current_position;
current_position = 0;
reader.speak(msg);
}; };
function read(s) {
to_speak = s.toString();
original_position = 0;
current_position = 0;
let msg = makeUtterance(to_speak, inputSlider.value);
reader.speak(msg);
}
function onReadClick() { function onReadClick() {
isRead = !isRead; isRead = !isRead;
if (!isRead) {
reader.cancel();
}
} }
function onChooseClick() { function onChooseClick() {
isChoose = !isChoose; isChoose = !isChoose;
} }
function stopRead() {
reader.cancel();
}

View File

@ -38,8 +38,18 @@ function highLight() {
list[i] = list[i].replace('|', ""); list[i] = list[i].replace('|', "");
list[i] = list[i].replace('?', ""); list[i] = list[i].replace('?', "");
if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) { if (list[i] !== "" && "<mark>".indexOf(list[i]) === -1 && "</mark>".indexOf(list[i]) === -1) {
//将文章中所有出现该单词word的地方改为" <mark>" + word + "<mark> "。 正则表达式RegExp()中,"\\s"代表单词前后必须要有空格,以防止只对单词中的部分字符高亮的情况出现。 //将文章中所有出现该单词word的地方改为"<mark>" + word + "<mark>"。 正则表达式RegExp()中,"\\b"代表单词边界匹配。
articleContent = articleContent.replace(new RegExp("\\s"+list[i]+"\\s", "g"), " <mark>" + list[i] + "</mark> ");
//修改代码
let articleContent_fb = articleContent; //文章副本
while(articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase()) !== -1 && list[i]!=""){
//找到副本中和list[i]匹配的第一个单词(第一种匹配情况),并赋值给list[i]。
const index = articleContent_fb.toLowerCase().indexOf(list[i].toLowerCase());
list[i] = articleContent_fb.substring(index, index + list[i].length);
articleContent_fb = articleContent_fb.substring(index + list[i].length); // 使用副本中list[i]之后的子串替换掉副本
articleContent = articleContent.replace(new RegExp("\\b"+list[i]+"\\b","g"),"<mark>" + list[i] + "</mark>");
}
} }
} }
document.getElementById("article").innerHTML = articleContent; document.getElementById("article").innerHTML = articleContent;

35
app/static/js/read.js Normal file
View File

@ -0,0 +1,35 @@
var Reader = (function() {
let reader = window.speechSynthesis;
let current_position = 0;
let original_position = 0;
let to_speak = "";
function makeUtterance(str, rate) {
let msg = new SpeechSynthesisUtterance(str);
msg.rate = rate;
msg.lang = "en-US";
msg.onboundary = ev => {
if (ev.name == "word") {
current_position = ev.charIndex;
}
}
return msg;
}
function read(s, rate) {
to_speak = s.toString();
original_position = 0;
current_position = 0;
let msg = makeUtterance(to_speak, rate);
reader.speak(msg);
}
function stopRead() {
reader.cancel();
}
return {
read: read,
stopRead: stopRead
};
})();

View File

@ -62,6 +62,13 @@ function delete_word(theWord) {
}); });
} }
function read_word(theWord) {
let to_speak = $("#word_" + theWord).text();
original_position = 0;
current_position = 0;
Reader.read(to_speak, inputSlider.value);
}
/* /*
* interface Word { * interface Word {
* word: string, * word: string,
@ -95,6 +102,7 @@ function wordTemplate(word) {
<a class="btn btn-success" onclick="familiar('${word.word}')" role="button">熟悉</a> <a class="btn btn-success" onclick="familiar('${word.word}')" role="button">熟悉</a>
<a class="btn btn-warning" onclick="unfamiliar('${word.word}')" role="button">不熟悉</a> <a class="btn btn-warning" onclick="unfamiliar('${word.word}')" role="button">不熟悉</a>
<a class="btn btn-danger" onclick="delete_word('${word.word}')" role="button">删除</a> <a class="btn btn-danger" onclick="delete_word('${word.word}')" role="button">删除</a>
<a class="btn btn-info" onclick="read_word('${word.word}')" role="button">朗读</a>
</p>`; </p>`;
} }

Binary file not shown.

View File

@ -68,9 +68,9 @@
<script> <script>
// 密码生成器 // 密码生成器
function generatePassword(length) { function generatePassword(length) {
var charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+~`|}{[]\:;?><,./-="; const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^*()_+~`|}{[]\:;?,./-=";
var password = ""; let password = "";
for (var i = 0; i < length; i++) { for (let i = 0; i < length; i++) {
password += charset.charAt(Math.floor(Math.random() * charset.length)); password += charset.charAt(Math.floor(Math.random() * charset.length));
} }
return password; return password;

View File

@ -49,23 +49,19 @@
{# <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#} {# <div class="alert alert-warning" role="alert">Congratulations! {{ message }}</div>#}
{# {% endfor %}#} {# {% endfor %}#}
{% if result_of_generate_article != "had read all articles" %} <button class="btn btn-success" id="load_next_article" onclick="load_next_article()"> 下一篇 Next Article </button>
<a id="next_btn" class="btn btn-success" href="/{{ username }}/reset" role="button"> 下一篇 Next Article </a> <button class="btn btn-success" id="load_pre_article" onclick="load_pre_article()" > 上一篇 Previous Article </button>
{% endif %}
{% if session.get('had_read_articles') and session.get('had_read_articles')['index']>0 %}
<a id="pre_btn" class="btn btn-success" href="/{{ username }}/back" role="button"> 上一篇 Previous Article </a>
{% endif %}
<p><b>阅读文章并回答问题</b></p> <p><b>阅读文章并回答问题</b></p>
<div id="text-content"> <div id="text-content">
{% if result_of_generate_article == 'found' %} <div id="found">
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="badge bg-success">{{ today_article["text_level"] }}</span> for you.</div> <div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success" id="user-level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="badge bg-success" id="text_level">{{ today_article["text_level"] }}</span> for you.</div>
<p class="text-muted">Article added on: {{ today_article["date"] }}</p><br/> <p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
<div class="p-3 mb-2 bg-light text-dark"><br/> <div class="p-3 mb-2 bg-light text-dark"><br/>
<p class="display-5">{{ today_article["article_title"] }}</p><br/> <p class="display-5" id="article_title">{{ today_article["article_title"] }}</p><br/>
<p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/> <p class="lead"><font id="article" size=2>{{ today_article["article_body"] }}</font></p><br/>
<p><small class="text-muted">{{ today_article['source'] }}</small></p><br/> <p><small class="text-muted" id="source">{{ today_article['source'] }}</small></p><br/>
<p><b>{{ today_article['question'] }}</b></p><br/> <p><b id="question">{{ today_article['question'] }}</b></p><br/>
<script type="text/javascript"> <script type="text/javascript">
function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#} function toggle_visibility(id) { {# https://css-tricks.com/snippets/javascript/showhide-element/#}
const e = document.getElementById(id); const e = document.getElementById(id);
@ -78,15 +74,13 @@
<button onclick="toggle_visibility('answer');">ANSWER</button> <button onclick="toggle_visibility('answer');">ANSWER</button>
<div id="answer" style="display:none;">{{ today_article['answer'] }}</div><br/> <div id="answer" style="display:none;">{{ today_article['answer'] }}</div><br/>
</div> </div>
{% elif result_of_generate_article == "not found" %} </div>
<div class="alert alert-success" role="alert"> <div class="alert alert-success" role="alert" id="not_found" style="display:none;">
<p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for you. You can try again a few times or mark new words in the passage to improve your level.</p> <p class="text-muted"><span class="badge bg-success">Notes:</span><br>No article is currently available for you. You can try again a few times or mark new words in the passage to improve your level.</p>
</div> </div>
{% elif result_of_generate_article == "had read all articles" %} <div class="alert alert-success" role="alert" id="read_all" style="display:none;">
<div class="alert alert-success" role="alert">
<p class="text-muted"><span class="badge bg-success">Notes:</span><br>You've read all the articles.</p> <p class="text-muted"><span class="badge bg-success">Notes:</span><br>You've read all the articles.</p>
</div> </div>
{% endif %}
</div> </div>
<input type="checkbox" onclick="toggleHighlighting()" checked/>生词高亮 <input type="checkbox" onclick="toggleHighlighting()" checked/>生词高亮
@ -139,6 +133,7 @@
<a class="btn btn-success" onclick="familiar('{{ word }}')" role="button">熟悉</a> <a class="btn btn-success" onclick="familiar('{{ word }}')" role="button">熟悉</a>
<a class="btn btn-warning" onclick="unfamiliar('{{ word }}')" role="button">不熟悉</a> <a class="btn btn-warning" onclick="unfamiliar('{{ word }}')" role="button">不熟悉</a>
<a class="btn btn-danger" onclick="delete_word('{{ word }}')" role="button">删除</a> <a class="btn btn-danger" onclick="delete_word('{{ word }}')" role="button">删除</a>
<a class="btn btn-info" onclick="read_word('{{ word }}')" role="button">朗读</a>
</p> </p>
{% endfor %} {% endfor %}
</div> </div>
@ -151,6 +146,67 @@
<script src="{{ js }}"></script> <script src="{{ js }}"></script>
{% endfor %} {% endfor %}
{% endif %} {% endif %}
<script type="text/javascript">
function load_next_article(){
$.ajax({
url: '/get_next_article/{{username}}',
dataType: 'json',
success: function(data) {
// 更新页面内容
if(data['today_article']){
update(data['today_article']);
check_pre(data['visited_articles']);
check_next(data['result_of_generate_article']);
}
}
});
}
function load_pre_article(){
$.ajax({
url: '/get_pre_article/{{username}}',
dataType: 'json',
success: function(data) {
// 更新页面内容
if(data['today_article']){
update(data['today_article']);
check_pre(data['visited_articles']);
}
}
});
}
function update(today_article){
$('#user-level').html(today_article['user_level']);
$('#text_level').html(today_article["text_level"]);
$('#date').html('Article added on: '+today_article["date"]);
$('#article_title').html(today_article["article_title"]);
$('#article').html(today_article["article_body"]);
$('#source').html(today_article['source']);
$('#question').html(today_article["question"]);
$('#answer').html(today_article["answer"]);
}
<!-- 检查是否存在上一篇或下一篇,不存在则对应按钮隐藏-->
function check_pre(visited_articles){
if((visited_articles=='')||(visited_articles['index']<=0)){
$('#load_pre_article').hide();
}else{
$('#load_pre_article').show();
}
}
function check_next(result_of_generate_article){
if(result_of_generate_article == "found"){
$('#found').show();$('#not_found').hide();
$('#read_all').hide();
}else if(result_of_generate_article == "not found"){
$('#found').hide();
$('#not_found').show();
$('#read_all').hide();
}else{
$('#found').hide();
$('#not_found').hide();
$('#read_all').show();
}
}
</script>
</body> </body>
<style> <style>
mark { mark {

View File

@ -21,41 +21,46 @@ userService = Blueprint("user_bp", __name__)
path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment path_prefix = './' # comment this line in deployment
@userService.route("/get_next_article/<username>",methods=['GET','POST'])
@userService.route("/<username>/reset", methods=['GET', 'POST']) def get_next_article(username):
def user_reset(username): user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
''' session['old_articleID'] = session.get('articleID')
用户界面
:param username: 用户名
:return: 返回页面内容
'''
if request.method == 'GET': if request.method == 'GET':
had_read_articles = session.get("had_read_articles") visited_articles = session.get("visited_articles")
if had_read_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来,无需index+=1 if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来,无需index+=1
had_read_articles['article_ids'].pop() visited_articles['article_ids'].pop()
else: # 当前不为“null”直接 index+=1 else: # 当前不为“null”直接 index+=1
had_read_articles["index"] += 1 visited_articles["index"] += 1
session["had_read_articles"] = had_read_articles session["visited_articles"] = visited_articles
return redirect(url_for('user_bp.userpage', username=username)) visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
data = {
'visited_articles': visited_articles,
'today_article': today_article,
'result_of_generate_article': result_of_generate_article
}
else: else:
return 'Under construction' return 'Under construction'
return json.dumps(data)
@userService.route("/<username>/back", methods=['GET']) @userService.route("/get_pre_article/<username>",methods=['GET'])
def user_back(username): def get_pre_article(username):
''' user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
用户界面
:param username: 用户名
:return: 返回页面内容
'''
if request.method == 'GET': if request.method == 'GET':
had_read_articles = session.get("had_read_articles") visited_articles = session.get("visited_articles")
had_read_articles["index"] -= 1 # 上一篇index-=1 if(visited_articles["index"]==0):
if had_read_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来 data=''
had_read_articles['article_ids'].pop() else:
session["had_read_articles"] = had_read_articles visited_articles["index"] -= 1 # 上一篇index-=1
return redirect(url_for('user_bp.userpage', username=username)) if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来
visited_articles['article_ids'].pop()
session["visited_articles"] = visited_articles
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
data = {
'visited_articles': visited_articles,
'today_article': today_article,
'result_of_generate_article':result_of_generate_article
}
return json.dumps(data)
@userService.route("/<username>/<word>/unfamiliar", methods=['GET', 'POST']) @userService.route("/<username>/<word>/unfamiliar", methods=['GET', 'POST'])
def unfamiliar(username, word): def unfamiliar(username, word):
@ -139,8 +144,8 @@ def userpage(username):
words = '' words = ''
for x in lst3: for x in lst3:
words += x[0] + ' ' words += x[0] + ' '
had_read_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('had_read_articles')) visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
session['had_read_articles'] = had_read_articles session['visited_articles'] = visited_articles
# 通过 today_article加载前端的显示页面 # 通过 today_article加载前端的显示页面
return render_template('userpage_get.html', return render_template('userpage_get.html',
admin_name=ADMIN_NAME, admin_name=ADMIN_NAME,
@ -154,10 +159,6 @@ def userpage(username):
yml=Yaml.yml, yml=Yaml.yml,
words=words) words=words)
@userService.route("/<username>/mark", methods=['GET', 'POST']) @userService.route("/<username>/mark", methods=['GET', 'POST'])
def user_mark_word(username): def user_mark_word(username):
''' '''

View File

@ -39,7 +39,7 @@ def file2str(fname):#文件转字符
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 special_characters = '\_©~<=>+/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
for c in special_characters: for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ') s = s.replace('--', ' ')

View File

@ -2,3 +2,4 @@ Flask==1.1.2
selenium==3.141.0 selenium==3.141.0
PyYAML~=6.0 PyYAML~=6.0
pony==0.7.16 pony==0.7.16
snowballstemmer==2.2.0