1
0
Fork 0
EnglishPal/app/Article.py

131 lines
4.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: UTF-8 -*-
from WordFreq import WordFreq
from wordfreqCMD import youdao_link, sort_in_descending_order
from UseSqlite import InsertQuery, RecordQuery
import pickle_idea, pickle_idea2
import os
import random, glob
import hashlib
from datetime import datetime
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def total_number_of_essays():
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
rq.instructions("SELECT * FROM article")
rq.do()
result = rq.get_results()
return len(result)
def get_article_title(s):
return s.split('\n')[0]
def get_article_body(s):
lst = s.split('\n')
lst.pop(0) # remove the first line
return '\n'.join(lst)
def get_today_article(user_word_list, existing_articles):
rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
if existing_articles is None:
existing_articles = {
"index" : 0, # 为 article_ids 的索引
"article_ids": [] # 之前显示文章的id列表越后越新
}
if existing_articles["index"] > len(existing_articles["article_ids"])-1:
rq.instructions("SELECT * FROM article")
else:
rq.instructions('SELECT * FROM article WHERE article_id=%d' % (existing_articles["article_ids"][existing_articles["index"]]))
rq.do()
result = rq.get_results()
random.shuffle(result)
# Choose article according to reader's level
d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
d3 = get_difficulty_level(d1, d2)
d = None
d_user = load_freq_history(user_word_list)
user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
text_level = 0
if existing_articles["index"] > len(existing_articles["article_ids"])-1: # 下一篇
flag_get_article = False
for reading in result:
text_level = text_difficulty_level(reading['text'], d3)
factor = random.gauss(0.8,
0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
if reading['article_id'] not in existing_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor): # 新的文章之前没有出现过且符合一定范围的水平
d = reading
existing_articles["article_ids"].append(d['article_id']) # 列表添加新的文章id下面进行
flag_get_article = True
break
if not flag_get_article:
existing_articles["index"] -= 1
else: # 上一篇
d = random.choice(result)
text_level = text_difficulty_level(d['text'], d3)
today_article = None
if d:
today_article = {
"user_level": '%4.2f' % user_level,
"text_level": '%4.2f' % text_level,
"date": d['date'],
"article_title": get_article_title(d['text']),
"article_body": get_article_body(d['text']),
"source": d["source"],
"question": get_question_part(d['question']),
"answer": get_answer_part(d['question'])
}
return existing_articles, today_article
def load_freq_history(path):
d = {}
if os.path.exists(path):
d = pickle_idea.load_record(path)
return d
def within_range(x, y, r):
return x > y and abs(x - y) <= r
def get_question_part(s):
s = s.strip()
result = []
flag = 0
for line in s.split('\n'):
line = line.strip()
if line == 'QUESTION':
result.append(line)
flag = 1
elif line == 'ANSWER':
flag = 0
elif flag == 1:
result.append(line)
return '\n'.join(result)
def get_answer_part(s):
s = s.strip()
result = []
flag = 0
for line in s.split('\n'):
line = line.strip()
if line == 'ANSWER':
flag = 1
elif flag == 1:
result.append(line)
return '\n'.join(result)