forked from mrlan/EnglishPal
172 lines
5.5 KiB
Python
172 lines
5.5 KiB
Python
from WordFreq import WordFreq
|
|
from AAA_VocabularyLevelEstimator import VocabularyLevelEstimator
|
|
from difficulty import WordDifficultyEvaluator
|
|
from wordfreqCMD import youdao_link, sort_in_descending_order
|
|
import pickle_idea, pickle_idea2
|
|
import os
|
|
import random, glob
|
|
import hashlib
|
|
from datetime import datetime
|
|
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
|
|
from model.article import get_all_articles, get_article_by_id, get_number_of_articles
|
|
import logging
|
|
import re
|
|
|
|
path_prefix = './'
|
|
db_path_prefix = './db/' # 部署时请注释掉此行
|
|
oxford_words_path = './db/oxford_words.txt'
|
|
|
|
|
|
def count_oxford_words(text, oxford_words):
|
|
words = re.findall(r'\b\w+\b', text.lower())
|
|
total_words = len(words)
|
|
oxford_word_count = sum(1 for word in words if word in oxford_words)
|
|
return oxford_word_count, total_words
|
|
|
|
|
|
def calculate_ratio(oxford_word_count, total_words):
|
|
if total_words == 0:
|
|
return 0
|
|
return oxford_word_count / total_words
|
|
|
|
|
|
def load_oxford_words(file_path):
|
|
oxford_words = {}
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
for line in file:
|
|
parts = line.strip().split()
|
|
word = parts[0]
|
|
pos = parts[1]
|
|
level = parts[2]
|
|
oxford_words[word] = {'pos': pos, 'level': level}
|
|
return oxford_words
|
|
|
|
|
|
def total_number_of_essays():
|
|
return get_number_of_articles()
|
|
|
|
|
|
def get_article_title(s):
|
|
return s.split('\n')[0]
|
|
|
|
|
|
def get_article_body(s):
|
|
lst = s.split('\n')
|
|
lst.pop(0) # 移除第一行标题
|
|
return '\n'.join(lst)
|
|
|
|
|
|
def get_today_article(user_word_list, visited_articles):
|
|
if visited_articles is None:
|
|
visited_articles = {
|
|
"index": 0,
|
|
"article_ids": []
|
|
}
|
|
|
|
if visited_articles["index"] > len(visited_articles["article_ids"]) - 1:
|
|
result = get_all_articles()
|
|
else:
|
|
if visited_articles["article_ids"][visited_articles["index"]] == 'null':
|
|
visited_articles["index"] -= 1
|
|
visited_articles["article_ids"].pop()
|
|
article_id = visited_articles["article_ids"][visited_articles["index"]]
|
|
result = get_article_by_id(article_id)
|
|
|
|
random.shuffle(result)
|
|
|
|
# 初始化 VocabularyLevelEstimator
|
|
vocabularyLevelEstimator = VocabularyLevelEstimator(
|
|
word_vectors_path='./wiki-news-300d-1M.vec',
|
|
words_and_tests_pickle=path_prefix + 'static/words_and_tests.p'
|
|
)
|
|
|
|
# 加载用户不认识的单词列表
|
|
d_user_unknown_words = load_freq_history(user_word_list)
|
|
|
|
# 获取用户的词汇难度级别
|
|
user_level = vocabularyLevelEstimator.estimate_user_vocabulary_level(d_user_unknown_words.keys())
|
|
|
|
d = None
|
|
result_of_generate_article = "not found"
|
|
text_level = 0
|
|
|
|
if visited_articles["index"] > len(visited_articles["article_ids"]) - 1:
|
|
amount_of_visited_articles = len(visited_articles["article_ids"])
|
|
amount_of_existing_articles = len(result)
|
|
if amount_of_visited_articles == amount_of_existing_articles:
|
|
result_of_generate_article = "had read all articles"
|
|
else:
|
|
for _ in range(3):
|
|
for reading in result:
|
|
text_level = vocabularyLevelEstimator.estimate_text_difficulty(reading['text'])
|
|
factor = random.gauss(0.8, 0.1)
|
|
if reading['article_id'] not in visited_articles["article_ids"] and within_range(text_level, user_level, (8.0 - user_level) * factor):
|
|
d = reading
|
|
visited_articles["article_ids"].append(d['article_id'])
|
|
result_of_generate_article = "found"
|
|
break
|
|
if result_of_generate_article == "found":
|
|
break
|
|
if result_of_generate_article != "found":
|
|
visited_articles["article_ids"].append('null')
|
|
else:
|
|
d = random.choice(result)
|
|
text_level = vocabularyLevelEstimator.estimate_text_difficulty(d['text'])
|
|
result_of_generate_article = "found"
|
|
|
|
today_article = None
|
|
if d:
|
|
oxford_words = load_oxford_words(oxford_words_path)
|
|
oxford_word_count, total_words = count_oxford_words(d['text'], oxford_words)
|
|
ratio = calculate_ratio(oxford_word_count, total_words)
|
|
today_article = {
|
|
"user_level": '%4.1f' % user_level,
|
|
"text_level": '%4.1f' % text_level,
|
|
"date": d['date'],
|
|
"article_title": get_article_title(d['text']),
|
|
"article_body": get_article_body(d['text']),
|
|
"source": d["source"],
|
|
"question": get_question_part(d['question']),
|
|
"answer": get_answer_part(d['question']),
|
|
"ratio": ratio
|
|
}
|
|
|
|
return visited_articles, today_article, result_of_generate_article
|
|
|
|
|
|
def load_freq_history(path):
|
|
d = {}
|
|
if os.path.exists(path):
|
|
d = pickle_idea.load_record(path)
|
|
return d
|
|
|
|
|
|
def within_range(x, y, r):
|
|
return x > y and abs(x - y) <= r
|
|
|
|
|
|
def get_question_part(s):
|
|
result = []
|
|
flag = False
|
|
for line in s.split('\n'):
|
|
line = line.strip()
|
|
if line == 'QUESTION':
|
|
result.append(line)
|
|
flag = True
|
|
elif line == 'ANSWER':
|
|
flag = False
|
|
elif flag:
|
|
result.append(line)
|
|
return '\n'.join(result)
|
|
|
|
|
|
def get_answer_part(s):
|
|
result = []
|
|
flag = False
|
|
for line in s.split('\n'):
|
|
line = line.strip()
|
|
if line == 'ANSWER':
|
|
flag = True
|
|
elif flag:
|
|
result.append(line)
|
|
return '\n'.join(result) |