EnglishPal/app/Article.py

178 lines
6.5 KiB
Python
Raw Permalink Normal View History

from WordFreq import WordFreq
from wordfreqCMD import youdao_link, sort_in_descending_order
from UseSqlite import InsertQuery, RecordQuery
import pickle_idea, pickle_idea2
import os
import random, glob
import hashlib
from datetime import datetime
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def total_number_of_essays():
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
rq.instructions("SELECT * FROM article")
rq.do()
result = rq.get_results()
return len(result)
def get_article_title(s):
return s.split('\n')[0]
def get_article_body(s):
lst = s.split('\n')
lst.pop(0) # remove the first line
return '\n'.join(lst)
def get_today_article(user_word_list, articleID):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
if articleID == None:
rq.instructions("SELECT * FROM article")
else:
rq.instructions('SELECT * FROM article WHERE article_id=%d' % (articleID))
rq.do()
result = rq.get_results()
random.shuffle(result)
# Choose article according to reader's level
d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
d3 = get_difficulty_level(d1, d2)
d = {}
d_user = load_freq_history(user_word_list)
2022-01-27 17:01:03 +08:00
user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
random.shuffle(result) # shuffle list
d = random.choice(result)
text_level = text_difficulty_level(d['text'], d3)
if articleID == None:
2022-06-01 16:08:55 +08:00
# print('result = ', len(result), sep=' ')
p = False
for reading in result:
text_level = text_difficulty_level(reading['text'], d3)
2022-06-01 16:08:55 +08:00
factor = random.gauss(0.8, 0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
if within_range(text_level, user_level, (8.0 - user_level) * factor):
2022-06-01 16:08:55 +08:00
list = session.get('articleIdList')
if list == None:
d = reading
list = [reading['article_id']]
session['articleIdList'] = list
p = True
break
exist = False
for i in list:
if reading['article_id'] == i:
exist = True
break
if exist == False:
d = reading
list.append(reading['article_id'])
session['articleIdList']=list
p = True
break
if p == False:
session['articleIdList'] = None
p = True
while p:
for reading in result:
text_level = text_difficulty_level(reading['text'], d3)
factor = random.gauss(0.8, 0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
if within_range(text_level, user_level, (8.0 - user_level) * factor):
if reading['article_id'] != session['prearticleID']:
d = reading
p = False
'''
测试代码
print(session['articleIdList'])
print(session['prearticleID'], d['article_id'], sep=' ')
问题描述选择下一篇文章时由于没有考虑选择文章时会重复选择当前文章可能会导致重复进入某一篇文章
解决方法选择下一篇文章时将选择的文章ID和当前文章ID比较若重复则换一个
进一步优化
问题描述利用上述方法可以满足不重复进入某一篇文章但是可能两篇文章会反复出现
解决方法记录一个队列用来保存出现过的文章ID
选择文章时若文章Id出现在队列中则放弃
若都出现过则清空队列随机选择一个不与当前文章重复的文章
'''
s = '<div class="alert alert-success" role="alert">According to your word list, your level is <span class="badge bg-success">%4.2f</span> and we have chosen an article with a difficulty level of <span class="badge bg-success">%4.2f</span> for you.</div>' % (
user_level, text_level)
s += '<p class="text-muted">Article added on: %s</p>' % (d['date'])
s += '<div class="p-3 mb-2 bg-light text-dark">'
article_title = get_article_title(d['text'])
article_body = get_article_body(d['text'])
s += '<p class="display-3">%s</p>' % (article_title)
s += '<p class="lead"><font id="article" size=2>%s</font></p>' % (article_body)
s += '<p><small class="text-muted">%s</small></p>' % (d['source'])
s += '<p><b>%s</b></p>' % (get_question_part(d['question']))
s = s.replace('\n', '<br/>')
s += '%s' % (get_answer_part(d['question']))
s += '</div>'
session['articleID'] = d['article_id']
return s
def load_freq_history(path):
d = {}
if os.path.exists(path):
d = pickle_idea.load_record(path)
return d
def within_range(x, y, r):
return x > y and abs(x - y) <= r
def get_question_part(s):
s = s.strip()
result = []
flag = 0
for line in s.split('\n'):
line = line.strip()
if line == 'QUESTION':
result.append(line)
flag = 1
elif line == 'ANSWER':
flag = 0
elif flag == 1:
result.append(line)
return '\n'.join(result)
def get_answer_part(s):
s = s.strip()
result = []
flag = 0
for line in s.split('\n'):
line = line.strip()
if line == 'ANSWER':
flag = 1
elif flag == 1:
result.append(line)
# https://css-tricks.com/snippets/javascript/showhide-element/
js = '''
<script type="text/javascript">
function toggle_visibility(id) {
var e = document.getElementById(id);
if(e.style.display == 'block')
e.style.display = 'none';
else
e.style.display = 'block';
}
2022-01-27 17:01:03 +08:00
</script>
'''
html_code = js
html_code += '\n'
html_code += '<button onclick="toggle_visibility(\'answer\');">ANSWER</button>\n'
html_code += '<div id="answer" style="display:none;">%s</div>\n' % ('\n'.join(result))
return html_code