2022-01-26 21:10:09 +08:00
|
|
|
|
from WordFreq import WordFreq
|
|
|
|
|
from wordfreqCMD import youdao_link, sort_in_descending_order
|
|
|
|
|
from UseSqlite import InsertQuery, RecordQuery
|
|
|
|
|
import pickle_idea, pickle_idea2
|
|
|
|
|
import os
|
|
|
|
|
import random, glob
|
|
|
|
|
import hashlib
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
|
|
|
|
|
from difficulty import get_difficulty_level, text_difficulty_level, user_difficulty_level
|
2023-04-24 22:20:35 +08:00
|
|
|
|
from model.article import get_number_of_articles, get_article, get_article_by_id
|
2022-01-26 21:10:09 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
path_prefix = '/var/www/wordfreq/wordfreq/'
|
|
|
|
|
path_prefix = './' # comment this line in deployment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def total_number_of_essays():
|
2023-04-24 22:20:35 +08:00
|
|
|
|
get_number_of_articles()
|
2022-01-26 21:10:09 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_article_title(s):
|
|
|
|
|
return s.split('\n')[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_article_body(s):
|
|
|
|
|
lst = s.split('\n')
|
|
|
|
|
lst.pop(0) # remove the first line
|
|
|
|
|
return '\n'.join(lst)
|
|
|
|
|
|
|
|
|
|
|
2023-03-08 16:33:13 +08:00
|
|
|
|
def get_today_article(user_word_list, existing_articles):
|
|
|
|
|
if existing_articles is None:
|
2023-04-04 22:31:53 +08:00
|
|
|
|
existing_articles = {
|
|
|
|
|
"index" : 0, # 为 article_ids 的索引
|
|
|
|
|
"article_ids": [] # 之前显示文章的id列表,越后越新
|
|
|
|
|
}
|
|
|
|
|
if existing_articles["index"] > len(existing_articles["article_ids"])-1:
|
2023-04-24 22:20:35 +08:00
|
|
|
|
result = list(get_article()) # 转为一个list
|
2022-01-26 21:10:09 +08:00
|
|
|
|
else:
|
2023-04-24 22:20:35 +08:00
|
|
|
|
result = [get_article_by_id(existing_articles["article_ids"][existing_articles["index"]])]
|
2022-01-26 21:10:09 +08:00
|
|
|
|
random.shuffle(result)
|
|
|
|
|
|
|
|
|
|
# Choose article according to reader's level
|
|
|
|
|
d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
|
|
|
|
|
d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
|
|
|
|
|
d3 = get_difficulty_level(d1, d2)
|
|
|
|
|
|
2023-04-04 22:31:53 +08:00
|
|
|
|
d = None
|
2022-01-26 21:10:09 +08:00
|
|
|
|
d_user = load_freq_history(user_word_list)
|
2022-01-27 17:01:03 +08:00
|
|
|
|
user_level = user_difficulty_level(d_user, d3) # more consideration as user's behaviour is dynamic. Time factor should be considered.
|
2023-03-08 16:33:13 +08:00
|
|
|
|
text_level = 0
|
2023-04-04 22:31:53 +08:00
|
|
|
|
if existing_articles["index"] > len(existing_articles["article_ids"])-1: # 下一篇
|
|
|
|
|
flag_get_article = False
|
2022-01-26 21:10:09 +08:00
|
|
|
|
for reading in result:
|
2023-04-24 22:20:35 +08:00
|
|
|
|
text_level = text_difficulty_level(reading.text, d3)
|
2022-01-26 21:10:09 +08:00
|
|
|
|
factor = random.gauss(0.8,
|
|
|
|
|
0.1) # a number drawn from Gaussian distribution with a mean of 0.8 and a stand deviation of 1
|
2023-04-24 22:20:35 +08:00
|
|
|
|
if reading.article_id not in existing_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor): # 新的文章之前没有出现过且符合一定范围的水平
|
2022-01-26 21:10:09 +08:00
|
|
|
|
d = reading
|
2023-04-24 22:20:35 +08:00
|
|
|
|
existing_articles["article_ids"].append(d.article_id) # 列表添加新的文章id;下面进行
|
2023-04-04 22:31:53 +08:00
|
|
|
|
flag_get_article = True
|
2022-01-26 21:10:09 +08:00
|
|
|
|
break
|
2023-04-04 22:31:53 +08:00
|
|
|
|
if not flag_get_article:
|
|
|
|
|
existing_articles["index"] -= 1
|
2023-03-08 16:33:13 +08:00
|
|
|
|
else: # 上一篇
|
|
|
|
|
d = random.choice(result)
|
2023-04-24 22:20:35 +08:00
|
|
|
|
text_level = text_difficulty_level(d.text, d3)
|
2023-03-08 16:33:13 +08:00
|
|
|
|
|
|
|
|
|
today_article = None
|
2023-04-04 22:31:53 +08:00
|
|
|
|
if d:
|
2023-03-08 16:33:13 +08:00
|
|
|
|
today_article = {
|
|
|
|
|
"user_level": '%4.2f' % user_level,
|
|
|
|
|
"text_level": '%4.2f' % text_level,
|
2023-04-24 22:20:35 +08:00
|
|
|
|
"date": d.date,
|
|
|
|
|
"article_title": get_article_title(d.text),
|
|
|
|
|
"article_body": get_article_body(d.text),
|
|
|
|
|
"source": d.source,
|
|
|
|
|
"question": get_question_part(d.question),
|
|
|
|
|
"answer": get_answer_part(d.question)
|
2023-03-08 16:33:13 +08:00
|
|
|
|
}
|
2022-01-26 21:10:09 +08:00
|
|
|
|
|
2023-03-08 16:33:13 +08:00
|
|
|
|
return existing_articles, today_article
|
2022-01-26 21:10:09 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_freq_history(path):
|
|
|
|
|
d = {}
|
|
|
|
|
if os.path.exists(path):
|
|
|
|
|
d = pickle_idea.load_record(path)
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def within_range(x, y, r):
|
|
|
|
|
return x > y and abs(x - y) <= r
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_question_part(s):
|
|
|
|
|
s = s.strip()
|
|
|
|
|
result = []
|
|
|
|
|
flag = 0
|
|
|
|
|
for line in s.split('\n'):
|
|
|
|
|
line = line.strip()
|
|
|
|
|
if line == 'QUESTION':
|
|
|
|
|
result.append(line)
|
|
|
|
|
flag = 1
|
|
|
|
|
elif line == 'ANSWER':
|
|
|
|
|
flag = 0
|
|
|
|
|
elif flag == 1:
|
|
|
|
|
result.append(line)
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_answer_part(s):
|
|
|
|
|
s = s.strip()
|
|
|
|
|
result = []
|
|
|
|
|
flag = 0
|
|
|
|
|
for line in s.split('\n'):
|
|
|
|
|
line = line.strip()
|
|
|
|
|
if line == 'ANSWER':
|
|
|
|
|
flag = 1
|
|
|
|
|
elif flag == 1:
|
|
|
|
|
result.append(line)
|
2023-03-30 16:10:22 +08:00
|
|
|
|
return '\n'.join(result)
|