import os import random import pickle_idea from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level from UseSqlite import RecordQuery path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = './' # comment this line in deployment def total_number_of_essays(): ''' 得到文章总数 return:文章数目 ''' rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') rq.instructions("SELECT * FROM article") rq.do() result = rq.get_results() return len(result) def get_article_title(s): ''' 得到文章的标题 ''' return s.split('\n')[0] def get_article_body(s): ''' 得到文章的内容 ''' lst = s.split('\n') lst.pop(0) # remove the first line return '\n'.join(lst) def get_today_article(user_word_list, visited_articles): ''' 根据用户的单词列表和阅读过的文章返回需要的文章的全部信息 ''' rq = RecordQuery(path_prefix + 'static/wordfreqapp.db') if visited_articles is None: visited_articles = { "index": 0, # 为 article_ids 的索引 "article_ids": [] # 之前显示文章的id列表,越后越新 } if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章,因此查找所有的文章 rq.instructions("SELECT * FROM article") else: # 生成阅读过的文章,因此查询指定 article_id 的文章 # 可能因为直接刷新页面导致直接去查询了'null',因此当刷新的页面的时候,需要直接进行“上一篇”操作 if visited_articles["article_ids"][visited_articles["index"]] == 'null': visited_articles["index"] -= 1 visited_articles["article_ids"].pop() rq.instructions( f'SELECT * FROM article WHERE article_id=' f'{visited_articles["article_ids"][visited_articles["index"]]}' ) rq.do() result = rq.get_results() random.shuffle(result) # Choose article according to reader's level d1 = load_freq_history(path_prefix + 'static/frequency/frequency.p') d2 = load_freq_history(path_prefix + 'static/words_and_tests.p') d3 = get_difficulty_level_for_user(d1, d2) d = None result_of_generate_article = "not found" d_user = load_freq_history(user_word_list) # 更多的考虑,因为用户的行为是动态的。应考虑时间因素。 user_level = user_difficulty_level(d_user, d3) text_level = 0 if visited_articles["index"] > len(visited_articles["article_ids"])-1: # 生成新的文章 amount_of_visited_articles = len(visited_articles["article_ids"]) amount_of_existing_articles = len(result) # 如果当前阅读过的文章的数量 == 存在的文章的数量,即所有的书本都阅读过了 if amount_of_visited_articles == amount_of_existing_articles: result_of_generate_article = "had read all articles" else: for k in range(3): # 最多尝试3次 for reading in result: text_level = text_difficulty_level(reading['text'], d3) # 从高斯分布中得出的平均值为 0.8,站位偏差为 1 的数字 factor = random.gauss(0.8, 0.1) # 新的文章之前没有出现过且符合一定范围的水平 if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor): d = reading visited_articles["article_ids"].append(d['article_id']) # 列表添加新的文章id;下面进行 result_of_generate_article = "found" break if result_of_generate_article == "found": # 用于成功找到文章后及时退出外层循环 break if result_of_generate_article != "found": # 阅读完所有文章,或者循环3次没有找到适合的文章,则放入空(“null”) visited_articles["article_ids"].append('null') else: # 生成已经阅读过的文章 d = random.choice(result) text_level = text_difficulty_level(d['text'], d3) result_of_generate_article = "found" today_article = None if d: today_article = { "user_level": f'{user_level:4.2f}', "text_level": f'{text_level:4.2f}', "date": d['date'], "article_title": get_article_title(d['text']), "article_body": get_article_body(d['text']), "source": d["source"], "question": get_question_part(d['question']), "answer": get_answer_part(d['question']) } return visited_articles, today_article, result_of_generate_article def load_freq_history(path): ''' 加载历史路径 ''' d = {} if os.path.exists(path): d = pickle_idea.load_record(path) return d def within_range(x, y, r): ''' 判断x>y并且x-y<=r ''' return x > y and abs(x - y) <= r def get_question_part(s): ''' 得到问题部分 ''' s = s.strip() result = [] flag = 0 for line in s.split('\n'): line = line.strip() if line == 'QUESTION': result.append(line) flag = 1 elif line == 'ANSWER': flag = 0 elif flag == 1: result.append(line) return '\n'.join(result) def get_answer_part(s): ''' 得到答案部分 ''' s = s.strip() result = [] flag = 0 for line in s.split('\n'): line = line.strip() if line == 'ANSWER': flag = 1 elif flag == 1: result.append(line) return '\n'.join(result)