1
0
Fork 0

暂时提交之前的修复代码,但该代码逻辑存在缺陷,待解决

Bug476-LiMengdie
李梦蝶 2024-04-22 22:48:24 +08:00
parent 6dbb1e2c06
commit 2f8dde4e42
5 changed files with 235 additions and 10 deletions

View File

@ -1,3 +1,4 @@
from WordFreq import WordFreq from WordFreq import WordFreq
from wordfreqCMD import youdao_link, sort_in_descending_order from wordfreqCMD import youdao_link, sort_in_descending_order
import pickle_idea, pickle_idea2 import pickle_idea, pickle_idea2
@ -10,6 +11,7 @@ from difficulty import get_difficulty_level_for_user, text_difficulty_level, use
from model.article import get_all_articles, get_article_by_id, get_number_of_articles from model.article import get_all_articles, get_article_by_id, get_number_of_articles
import logging import logging
path_prefix = './' path_prefix = './'
db_path_prefix = './db/' # comment this line in deployment db_path_prefix = './db/' # comment this line in deployment
@ -28,7 +30,8 @@ def get_article_body(s):
return '\n'.join(lst) return '\n'.join(lst)
def get_today_article(user_word_list, visited_articles): #user_articlesWithoutNewWords_record 保存前端传来的用户阅读时长超过15秒且不含高亮生词的文章索引
def get_today_article(user_word_list, visited_articles,user_articlesWithoutNewWords_record):
if visited_articles is None: if visited_articles is None:
visited_articles = { visited_articles = {
"index" : 0, # 为 article_ids 的索引 "index" : 0, # 为 article_ids 的索引
@ -57,7 +60,12 @@ def get_today_article(user_word_list, visited_articles):
d_user = load_freq_history(user_word_list) d_user = load_freq_history(user_word_list)
logging.debug('* get_today_article(): user_difficulty_level() start') logging.debug('* get_today_article(): user_difficulty_level() start')
user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
articles_id_list=None
if os.path.exists(user_articlesWithoutNewWords_record) != False:
articles_id_list = pickle_idea.load_record(user_articlesWithoutNewWords_record)
#将 用户阅读时长超过15秒且不含高亮生词的文章记录 传入user_difficulty_level并据此 提高用户level
user_level = user_difficulty_level(d_user, d3,articles_id_list) # more consideration as user's behaviour is dynamic. Time factor should be considered.
logging.debug('* get_today_article(): done') logging.debug('* get_today_article(): done')
text_level = 0 text_level = 0
if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章 if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章
@ -90,6 +98,7 @@ def get_today_article(user_word_list, visited_articles):
"user_level": '%4.1f' % user_level, "user_level": '%4.1f' % user_level,
"text_level": '%4.1f' % text_level, "text_level": '%4.1f' % text_level,
"date": d['date'], "date": d['date'],
"article_id":d['article_id'],#该变量存储 用户阅读时长超过15秒且不含高亮生词的文章索引
"article_title": get_article_title(d['text']), "article_title": get_article_title(d['text']),
"article_body": get_article_body(d['text']), "article_body": get_article_body(d['text']),
"source": d["source"], "source": d["source"],

View File

@ -7,6 +7,7 @@
import pickle import pickle
import math import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer import snowballstemmer
@ -94,10 +95,21 @@ def revert_dict(d):
return d2 return d2
def user_difficulty_level(d_user, d): #articlesWithoutNewWords 存储用户阅读时长超过15秒且不含高亮生词的文章索引信息用户阅读完后没有添加生词的文章
def user_difficulty_level(d_user, d,articlesWithoutNewWords):
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = 0 count = 0
geometric = 1 geometric = 1
#传入根据用户生词计算出的用户level
def get_level_sum(level):
if articlesWithoutNewWords!=None:#若用户阅读完后没有添加生词的文章索引不为空则根据这些文章的等级适度提高用户的level
for article in articlesWithoutNewWords:
if(level<float(article.get('text_level'))):
level+=(float(article.get('text_level'))-level)/20 #根据文章等级提高用户level用户level与用户阅读完后没有添加生词的文章level 差距越大,分数提高程度越大
print(f"用户等级{level}")
print("-----------------------------------")
return level
for date in sorted(d_user2.keys(), for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important while determining user's level reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words lst = d_user2[date] # a list of words
@ -115,10 +127,9 @@ def user_difficulty_level(d_user, d):
geometric = geometric * (hard) geometric = geometric * (hard)
count += 1 count += 1
if count >= 10: if count >= 10:
return geometric ** (1 / count) return get_level_sum(geometric ** (1 / count))
return geometric ** (1 / max(count, 1))
return get_level_sum(geometric ** (1 / max(count, 1)))
def text_difficulty_level(s, d): def text_difficulty_level(s, d):
s = remove_punctuation(s) s = remove_punctuation(s)

View File

@ -173,13 +173,33 @@
{% endfor %} {% endfor %}
{% endif %} {% endif %}
<script type="text/javascript"> <script type="text/javascript">
window.onload = function () { // 页面加载时执行 window.onload = function () { // 页面加载时执行
// 刷新页面或进入页面时判断,若不是首篇文章,则上一篇按钮可见 // 刷新页面或进入页面时判断,若不是首篇文章,则上一篇按钮可见
if(sessionStorage.getItem('pre_page_button')!="display" && sessionStorage.getItem('pre_page_button')){ if(sessionStorage.getItem('pre_page_button')!="display" && sessionStorage.getItem('pre_page_button')){
$('#load_pre_article').show(); $('#load_pre_article').show();
} }
startTime=Date.now();//记录用户页面的初始时间
if(window.performance.navigation.type!=1){//将第一次加载页面时的文章id,level保存到本地
sessionStorage.setItem('article_id','{{today_article['article_id']}}');
sessionStorage.setItem('text_level','{{today_article['text_level']}}');
}
//
}; };
function load_next_article(){ function load_next_article(){
//在进入下一篇之前处理
endTime=Date.now();//记录用户点击下一篇文章时的时间
spendtime=endTime-startTime;//计算出用户浏览该页面的时长,单位毫秒
if(spendtime/1000>15){//若浏览时长大于15说明用户认真阅读了该篇文章时长小于15代表用户不感兴趣跳过不做任何处理
if(!checkMarkedWords()){//检查该篇文章是否有生词高亮若没有将文章id,level发送到后台保存至文件中在计算用户level时根据文件中保存的文章等级计算
$.ajax({
url: '/submit_article_id_without_new_words/{{username}}/'+sessionStorage.getItem('article_id')+'/'+sessionStorage.getItem('text_level'),
dataType: 'json',
success: function(data) {}
});
}
}
$.ajax({ $.ajax({
url: '/get_next_article/{{username}}', url: '/get_next_article/{{username}}',
dataType: 'json', dataType: 'json',
@ -192,8 +212,21 @@
} }
} }
}); });
startTime=Date.now();//在页面加载完下一篇文章,记录起始时间
} }
function load_pre_article(){ function load_pre_article(){
//在进入上一篇之前处理
endTime=Date.now();//记录用户点击上一篇文章时的时间
spendtime=endTime-startTime;//计算出用户浏览该文章的时长,单位毫秒
if(spendtime/1000>15){
if(!checkMarkedWords()){
$.ajax({
url: '/submit_article_id_without_new_words/{{username}}/'+sessionStorage.getItem('article_id')+'/'+sessionStorage.getItem('text_level'),
dataType: 'json',
success: function(data) {}
});
}
}
$.ajax({ $.ajax({
url: '/get_pre_article/{{username}}', url: '/get_pre_article/{{username}}',
dataType: 'json', dataType: 'json',
@ -205,6 +238,7 @@
} }
} }
}); });
startTime=Date.now();//在页面加载完下一篇文章,记录起始时间
} }
function update(today_article){ function update(today_article){
$('#user_level').html(today_article['user_level']); $('#user_level').html(today_article['user_level']);
@ -219,7 +253,30 @@
setTimeout(() => {document.querySelector('#text_level').classList.remove('mark');}, 2000); setTimeout(() => {document.querySelector('#text_level').classList.remove('mark');}, 2000);
document.querySelector('#user_level').classList.add('mark'); // do the same thing for user difficulty level document.querySelector('#user_level').classList.add('mark'); // do the same thing for user difficulty level
setTimeout(() => {document.querySelector('#user_level').classList.remove('mark');}, 2000); setTimeout(() => {document.querySelector('#user_level').classList.remove('mark');}, 2000);
sessionStorage.setItem('article_id',today_article["article_id"]);//将更新后的文章id,level保存到本地
sessionStorage.setItem('text_level',today_article['text_level']);
} }
function checkMarkedWords(){//检查文章中是否有高亮生词
let article_title= document.getElementById("article_title").innerHTML;//从页面中获取文章名和内容
let article_body=document.getElementById("article").innerHTML;
let article=article_title+" "+article_body;
let pickedWords = document.getElementById("selected-words"); // words picked to the text area
let dictionaryWords = document.getElementById("selected-words2"); // words appearing in the user's new words list
let allWords = dictionaryWords === null ? pickedWords.value + " " : pickedWords.value + " " + dictionaryWords.value;
const list = allWords.split(" "); // 将所有的生词放入一个list中
for (let i = 0; i < list.length; ++i) {
list[i] = list[i].replace(/(^\W*)|(\W*$)/g, "");
}
for (let i=1;i<list.length-1;i++){//list
if(article.search(list[i])!==-1){
return true;
}
}
return false;
}
//
<!-- 检查是否存在上一篇或下一篇,不存在则对应按钮隐藏--> <!-- 检查是否存在上一篇或下一篇,不存在则对应按钮隐藏-->
function check_pre(visited_articles){ function check_pre(visited_articles){
if((visited_articles=='')||(visited_articles['index']<=0)){ if((visited_articles=='')||(visited_articles['index']<=0)){

View File

@ -0,0 +1,114 @@
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import UnexpectedAlertPresentException, NoAlertPresentException, \
ElementClickInterceptedException, NoSuchElementException, TimeoutException
from helper import signup
def is_textarea_empty(driver):
# 获取<textarea>元素的内容,判断是否加入了生词
textarea_element = driver.find_element(By.ID, 'selected-words')
content = textarea_element.get_attribute("value")
return content is None or content.strip() == ""
def get_user_level(driver):
return float(driver.find_element(By.ID, 'user_level').text)
def scroll(total_time, driver):
try:
half_time = (total_time - 2) / 2 # 分成两半,一半时间滚到底,一半时间滚回顶,模拟阅读时间
# 滚动到底部
start_time = time.time()
end_time = start_time + half_time
while time.time() < end_time:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# 确保滚动更平滑
time.sleep(1)
# 滚动回顶部
start_time = time.time()
end_time = start_time + half_time
while time.time() < end_time:
driver.execute_script("window.scrollTo(0, -document.body.scrollHeight);")
time.sleep(1)
except ElementClickInterceptedException:
print("加载出错")
def read_test_with_time(driver, total_time):
# 获取点击前的user_level和text_level
before_user_level = get_user_level(driver)
before_text_level = float(driver.find_element(By.ID, 'text_level').text)
# 检查是否加入了生词簿
check = is_textarea_empty(driver)
scroll(total_time, driver)
# 定位“下一页”按钮
next_button = driver.find_element(By.ID, 'load_next_article')
# 模拟点击
next_button.click()
# 关闭弹窗
try:
WebDriverWait(driver, 1).until(EC.alert_is_present())
driver.switch_to.alert.accept()
except (UnexpectedAlertPresentException, NoAlertPresentException):
pass
# 等待页面加载完毕,正确地获取更新后的值
time.sleep(2)
# 获取点击后的user_level
after_user_level = get_user_level(driver)
# 等待3秒方便观察测试
time.sleep(3)
return before_user_level, after_user_level, before_text_level, check
def test_score_with_enough_time(driver, URL):
try:
# 登录并跳转到测试主页面
signup(URL, driver)
# 测试阅读到足够时间的等级变化
before_user_level, after_user_level, before_text_level, check = read_test_with_time(driver, 18)
# print(check, before_user_level, after_user_level, before_text_level)
if before_user_level < before_text_level and check:
assert before_user_level < after_user_level, 'The user level dose not increase after reading an article with a high level and clicking to navigate away without adding any new vocabulary. '
except (NoSuchElementException, TimeoutException) as e:
print("Error occurs: " + str(e))
finally:
driver.quit()
def test_score_without_enough_time(driver, URL):
try:
# 登录并跳转到测试主页面
signup(URL, driver)
# 测试阅读时间不足够的等级变化
before_user_level, after_user_level, before_text_level, check = read_test_with_time(driver, 6)
# print(check, before_user_level, after_user_level, before_text_level)
if before_user_level < before_text_level and check:
assert before_user_level == after_user_level, 'After the user quickly skipped through the article, the user level does not remain unchanged. '
except (NoSuchElementException, TimeoutException) as e:
print("Error occurs: " + str(e))
finally:
driver.quit()

View File

@ -1,3 +1,5 @@
import pickle
import os.path
from datetime import datetime from datetime import datetime
from admin_service import ADMIN_NAME from admin_service import ADMIN_NAME
from flask import * from flask import *
@ -26,6 +28,8 @@ path_prefix = './' # comment this line in deployment
@userService.route("/get_next_article/<username>",methods=['GET','POST']) @userService.route("/get_next_article/<username>",methods=['GET','POST'])
def get_next_article(username): def get_next_article(username):
user_articlesWithoutNewWords_record = path_prefix + 'static/frequency/' + 'ArticlesWithoutNewWords_%s.pickle' % (username)
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
session['old_articleID'] = session.get('articleID') session['old_articleID'] = session.get('articleID')
if request.method == 'GET': if request.method == 'GET':
@ -36,7 +40,7 @@ def get_next_article(username):
visited_articles["index"] += 1 visited_articles["index"] += 1
session["visited_articles"] = visited_articles session["visited_articles"] = visited_articles
logging.debug('/get_next_article: start calling get_today_arcile()') logging.debug('/get_next_article: start calling get_today_arcile()')
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles')) visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'),user_articlesWithoutNewWords_record)
logging.debug('/get_next_arcile: done.') logging.debug('/get_next_arcile: done.')
data = { data = {
'visited_articles': visited_articles, 'visited_articles': visited_articles,
@ -47,8 +51,37 @@ def get_next_article(username):
return 'Under construction' return 'Under construction'
return json.dumps(data) return json.dumps(data)
#保存前端传来 用户阅读时长超过15秒且不含高亮生词的文章用户阅读完后没有添加生词的文章的id,level
@userService.route("/submit_article_id_without_new_words/<username>/<article_id>/<text_level>",methods=['GET'])
def set_article_id_without_new_words(username,article_id,text_level):
user_articles_without_new_words_record_path = path_prefix + 'static/frequency/' + 'ArticlesWithoutNewWords_%s.pickle' % (username)
if os.path.exists(user_articles_without_new_words_record_path)==False: #若不存在 存储用户阅读时长超过15秒且不含高亮生词的文章索引信息的文件 则新建
f = open(user_articles_without_new_words_record_path, 'wb')
list=[]
pickle.dump(list,f)
f.close()
fp=open(user_articles_without_new_words_record_path,'rb')
user_article_id_record=pickle.load(fp)
fp.close()
dic = {}
dic['article_id'] = article_id
dic['text_level'] = text_level
if dic not in user_article_id_record: #若已存在记录(即这篇文章之前被阅读过且没有添加生词,且已加入索引信息记录文件中),则不再重复加入,避免重复刷分
user_article_id_record.append(dic)
print(f"编号为{article_id}的文章阅读时间大于15秒且未有生词记录于ArticlesWithoutNewWords_<username>.pickle")
print(user_article_id_record)
f=open(user_articles_without_new_words_record_path,'wb')
pickle.dump(user_article_id_record,f)
f.close()
return "success"
@userService.route("/get_pre_article/<username>",methods=['GET']) @userService.route("/get_pre_article/<username>",methods=['GET'])
def get_pre_article(username): def get_pre_article(username):
user_articlesWithoutNewWords_record = path_prefix + 'static/frequency/' + 'ArticlesWithoutNewWords_%s.pickle' % (
username)
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
if request.method == 'GET': if request.method == 'GET':
visited_articles = session.get("visited_articles") visited_articles = session.get("visited_articles")
@ -59,7 +92,7 @@ def get_pre_article(username):
if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来 if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来
visited_articles['article_ids'].pop() visited_articles['article_ids'].pop()
session["visited_articles"] = visited_articles session["visited_articles"] = visited_articles
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles')) visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'),user_articlesWithoutNewWords_record)
data = { data = {
'visited_articles': visited_articles, 'visited_articles': visited_articles,
'today_article': today_article, 'today_article': today_article,
@ -130,7 +163,8 @@ def userpage(username):
# 获取session里的用户名 # 获取session里的用户名
username = session.get('username') username = session.get('username')
user_articlesWithoutNewWords_record = path_prefix + 'static/frequency/' + 'ArticlesWithoutNewWords_%s.pickle' % (
username)
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
if request.method == 'POST': # when we submit a form if request.method == 'POST': # when we submit a form
@ -149,7 +183,7 @@ def userpage(username):
words = '' words = ''
for x in lst3: for x in lst3:
words += x[0] + ' ' words += x[0] + ' '
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles')) visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'),user_articlesWithoutNewWords_record)
session['visited_articles'] = visited_articles session['visited_articles'] = visited_articles
# 通过 today_article加载前端的显示页面 # 通过 today_article加载前端的显示页面
return render_template('userpage_get.html', return render_template('userpage_get.html',