EnglishPal/app/Article.py

import os
import random
import pickle_idea
from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
from UseSqlite import RecordQuery

path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './'  # comment this line in deployment


def total_number_of_essays():
    '''
        得到文章总数
        return:文章数目
    '''
    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
    rq.instructions("SELECT * FROM article")
    rq.do()
    result = rq.get_results()
    return len(result)


def get_article_title(s):
    '''
    得到文章的标题
    '''
    return s.split('\n')[0]


def get_article_body(s):
    '''
    得到文章的内容
    '''
    lst = s.split('\n')
    lst.pop(0)  # remove the first line
    return '\n'.join(lst)


def get_today_article(user_word_list, visited_articles):
    '''
    根据用户的单词列表和阅读过的文章返回需要的文章的全部信息
    '''
    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
    if visited_articles is None:
        visited_articles = {
            "index": 0,  # 为 article_ids 的索引
            "article_ids": []  # 之前显示文章的id列表，越后越新
        }
    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章，因此查找所有的文章
        rq.instructions("SELECT * FROM article")
    else:  # 生成阅读过的文章，因此查询指定 article_id 的文章
        # 可能因为直接刷新页面导致直接去查询了'null'，因此当刷新的页面的时候，需要直接进行“上一篇”操作
        if visited_articles["article_ids"][visited_articles["index"]] == 'null':
            visited_articles["index"] -= 1
            visited_articles["article_ids"].pop()
        rq.instructions(
            f'SELECT * FROM article WHERE article_id='
            f'{visited_articles["article_ids"][visited_articles["index"]]}'
        )
    rq.do()
    result = rq.get_results()
    random.shuffle(result)

    # Choose article according to reader's level
    d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
    d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
    d3 = get_difficulty_level_for_user(d1, d2)

    d = None
    result_of_generate_article = "not found"
    d_user = load_freq_history(user_word_list)
    # 更多的考虑，因为用户的行为是动态的。应考虑时间因素。
    user_level = user_difficulty_level(d_user, d3)
    text_level = 0
    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章
        amount_of_visited_articles = len(visited_articles["article_ids"])
        amount_of_existing_articles = len(result)
        # 如果当前阅读过的文章的数量 == 存在的文章的数量，即所有的书本都阅读过了
        if amount_of_visited_articles == amount_of_existing_articles:
            result_of_generate_article = "had read all articles"
        else:
            for k in range(3):  # 最多尝试3次
                for reading in result:
                    text_level = text_difficulty_level(reading['text'], d3)
                    # 从高斯分布中得出的平均值为 0.8，站位偏差为 1 的数字
                    factor = random.gauss(0.8, 0.1)
                    # 新的文章之前没有出现过且符合一定范围的水平
                    if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):
                        d = reading
                        visited_articles["article_ids"].append(d['article_id'])  # 列表添加新的文章id；下面进行
                        result_of_generate_article = "found"
                        break
                if result_of_generate_article == "found":  # 用于成功找到文章后及时退出外层循环
                    break
        if result_of_generate_article != "found":  # 阅读完所有文章，或者循环3次没有找到适合的文章，则放入空（“null”）
            visited_articles["article_ids"].append('null')
    else:  # 生成已经阅读过的文章
        d = random.choice(result)
        text_level = text_difficulty_level(d['text'], d3)
        result_of_generate_article = "found"

    today_article = None
    if d:
        today_article = {
            "user_level": f'{user_level:4.2f}',
            "text_level": f'{text_level:4.2f}',
            "date": d['date'],
            "article_title": get_article_title(d['text']),
            "article_body": get_article_body(d['text']),
            "source": d["source"],
            "question": get_question_part(d['question']),
            "answer": get_answer_part(d['question'])
        }

    return visited_articles, today_article, result_of_generate_article


def load_freq_history(path):
    '''
    加载历史路径
    '''
    d = {}
    if os.path.exists(path):
        d = pickle_idea.load_record(path)
    return d


def within_range(x, y, r):
    '''
    判断x>y并且x-y<=r
    '''
    return x > y and abs(x - y) <= r


def get_question_part(s):
    '''
    得到问题部分
    '''
    s = s.strip()
    result = []
    flag = 0
    for line in s.split('\n'):
        line = line.strip()
        if line == 'QUESTION':
            result.append(line)
            flag = 1
        elif line == 'ANSWER':
            flag = 0
        elif flag == 1:
            result.append(line)
    return '\n'.join(result)


def get_answer_part(s):
    '''
    得到答案部分
    '''
    s = s.strip()
    result = []
    flag = 0
    for line in s.split('\n'):
        line = line.strip()
        if line == 'ANSWER':
            flag = 1
        elif flag == 1:
            result.append(line)
    return '\n'.join(result)
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								import os
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								import random
 								import pickle_idea
-												pull最新的snapshot-20230511，后更新了difficulty.py和Article.py的部分代码，提交了新的pickle文件

											
										
										
											2023-05-18 23:29:38 +08:00
+								from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								from UseSqlite import RecordQuery
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
 								path_prefix = '/var/www/wordfreq/wordfreq/'
 								path_prefix = './'  # comment this line in deployment
 								def total_number_of_essays():
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								        得到文章总数
 								        return:文章数目
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
 								    rq.instructions("SELECT * FROM article")
 								    rq.do()
 								    result = rq.get_results()
 								    return len(result)
 								def get_article_title(s):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    得到文章的标题
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    return s.split('\n')[0]
 								def get_article_body(s):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    得到文章的内容
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    lst = s.split('\n')
 								    lst.pop(0)  # remove the first line
 								    return '\n'.join(lst)
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								def get_today_article(user_word_list, visited_articles):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    根据用户的单词列表和阅读过的文章返回需要的文章的全部信息
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    rq = RecordQuery(path_prefix + 'static/wordfreqapp.db')
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								    if visited_articles is None:
 								        visited_articles = {
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								            "index": 0,  # 为 article_ids 的索引
-												将记录阅读过文章的数据结果改为字典，以及修改了flag的问题

											
										
										
											2023-04-04 22:31:53 +08:00
+								            "article_ids": []  # 之前显示文章的id列表，越后越新
 								        }
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章，因此查找所有的文章
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								        rq.instructions("SELECT * FROM article")
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								    else:  # 生成阅读过的文章，因此查询指定 article_id 的文章
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								        # 可能因为直接刷新页面导致直接去查询了'null'，因此当刷新的页面的时候，需要直接进行“上一篇”操作
 								        if visited_articles["article_ids"][visited_articles["index"]] == 'null':
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								            visited_articles["index"] -= 1
 								            visited_articles["article_ids"].pop()
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								        rq.instructions(
 								            f'SELECT * FROM article WHERE article_id='
 								            f'{visited_articles["article_ids"][visited_articles["index"]]}'
 								        )
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    rq.do()
 								    result = rq.get_results()
 								    random.shuffle(result)
 								    # Choose article according to reader's level
 								    d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p')
 								    d2 = load_freq_history(path_prefix + 'static/words_and_tests.p')
-												pull最新的snapshot-20230511，后更新了difficulty.py和Article.py的部分代码，提交了新的pickle文件

											
										
										
											2023-05-18 23:29:38 +08:00
+								    d3 = get_difficulty_level_for_user(d1, d2)
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
-												将记录阅读过文章的数据结果改为字典，以及修改了flag的问题

											
										
										
											2023-04-04 22:31:53 +08:00
+								    d = None
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								    result_of_generate_article = "not found"
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    d_user = load_freq_history(user_word_list)
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    # 更多的考虑，因为用户的行为是动态的。应考虑时间因素。
 								    user_level = user_difficulty_level(d_user, d3)
-												完成了对bug509的修复，以及重构项目（去掉了业务中的前端脚本）

											
										
										
											2023-03-08 16:33:13 +08:00
+								    text_level = 0
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								    if visited_articles["index"] > len(visited_articles["article_ids"])-1:  # 生成新的文章
 								        amount_of_visited_articles = len(visited_articles["article_ids"])
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								        amount_of_existing_articles = len(result)
 								        # 如果当前阅读过的文章的数量 == 存在的文章的数量，即所有的书本都阅读过了
 								        if amount_of_visited_articles == amount_of_existing_articles:
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								            result_of_generate_article = "had read all articles"
 								        else:
 								            for k in range(3):  # 最多尝试3次
 								                for reading in result:
 								                    text_level = text_difficulty_level(reading['text'], d3)
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								                    # 从高斯分布中得出的平均值为 0.8，站位偏差为 1 的数字
 								                    factor = random.gauss(0.8, 0.1)
 								                    # 新的文章之前没有出现过且符合一定范围的水平
 								                    if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								                        d = reading
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								                        visited_articles["article_ids"].append(d['article_id'])  # 列表添加新的文章id；下面进行
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								                        result_of_generate_article = "found"
 								                        break
 								                if result_of_generate_article == "found":  # 用于成功找到文章后及时退出外层循环
 								                    break
-												修复边界值问题（当刚开始就没有找到文章或者就根本被没有文章的时候，会出现上一篇按钮）

											
										
										
											2023-04-21 02:36:51 +08:00
+								        if result_of_generate_article != "found":  # 阅读完所有文章，或者循环3次没有找到适合的文章，则放入空（“null”）
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								            visited_articles["article_ids"].append('null')
-												添加了阅读完所有文章的提示

											
										
										
											2023-04-20 22:53:30 +08:00
+								    else:  # 生成已经阅读过的文章
-												完成了对bug509的修复，以及重构项目（去掉了业务中的前端脚本）

											
										
										
											2023-03-08 16:33:13 +08:00
+								        d = random.choice(result)
 								        text_level = text_difficulty_level(d['text'], d3)
-												修复边界值问题（当刚开始就没有找到文章或者就根本被没有文章的时候，会出现上一篇按钮）

											
										
										
											2023-04-21 02:36:51 +08:00
+								        result_of_generate_article = "found"
-												完成了对bug509的修复，以及重构项目（去掉了业务中的前端脚本）

											
										
										
											2023-03-08 16:33:13 +08:00
 								    today_article = None
-												将记录阅读过文章的数据结果改为字典，以及修改了flag的问题

											
										
										
											2023-04-04 22:31:53 +08:00
+								    if d:
-												完成了对bug509的修复，以及重构项目（去掉了业务中的前端脚本）

											
										
										
											2023-03-08 16:33:13 +08:00
+								        today_article = {
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								            "user_level": f'{user_level:4.2f}',
 								            "text_level": f'{text_level:4.2f}',
-												完成了对bug509的修复，以及重构项目（去掉了业务中的前端脚本）

											
										
										
											2023-03-08 16:33:13 +08:00
+								            "date": d['date'],
 								            "article_title": get_article_title(d['text']),
 								            "article_body": get_article_body(d['text']),
 								            "source": d["source"],
 								            "question": get_question_part(d['question']),
 								            "answer": get_answer_part(d['question'])
 								        }
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
-												修改变量名had_read_articles->visited_articles

											
										
										
											2023-04-25 17:47:51 +08:00
+								    return visited_articles, today_article, result_of_generate_article
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
 								def load_freq_history(path):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    加载历史路径
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    d = {}
 								    if os.path.exists(path):
 								        d = pickle_idea.load_record(path)
 								    return d
 								def within_range(x, y, r):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    判断x>y并且x-y<=r
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    return x > y and abs(x - y) <= r
 								def get_question_part(s):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    得到问题部分
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    s = s.strip()
 								    result = []
 								    flag = 0
 								    for line in s.split('\n'):
 								        line = line.strip()
 								        if line == 'QUESTION':
 								            result.append(line)
 								            flag = 1
 								        elif line == 'ANSWER':
 								            flag = 0
 								        elif flag == 1:
 								            result.append(line)
 								    return '\n'.join(result)
 								def get_answer_part(s):
-												refactor:pylint

											
										
										
											2023-06-04 00:35:43 +08:00
+								    '''
 								    得到答案部分
 								    '''
-												Repalce old app folder with SoftArch王炫/english-pal-master/app/

											
										
										
											2022-01-26 21:10:09 +08:00
+								    s = s.strip()
 								    result = []
 								    flag = 0
 								    for line in s.split('\n'):
 								        line = line.strip()
 								        if line == 'ANSWER':
 								            flag = 1
 								        elif flag == 1:
 								            result.append(line)
-												Article.py: remove debug statement.

											
										
										
											2023-03-30 16:10:22 +08:00
+								    return '\n'.join(result)