forked from mrlan/EnglishPal
127 lines
4.4 KiB
Python
127 lines
4.4 KiB
Python
import random
|
|
import sqlite3
|
|
import re
|
|
from nltk.corpus import wordnet as wn
|
|
|
|
|
|
class Essay:
|
|
def __init__(self):
|
|
self._article_id = 0
|
|
self._essay = None
|
|
self._difficulty = None
|
|
self._answers = None
|
|
self._questions = None
|
|
pass
|
|
|
|
@property
|
|
def article_id(self):
|
|
pass
|
|
|
|
@article_id.setter
|
|
def article_id(self, article_id):
|
|
self._article_id = article_id
|
|
self.find_essay_in_database(self._article_id)
|
|
|
|
# 获取数据库中的文章和等级
|
|
def find_essay_in_database(self, id):
|
|
try:
|
|
# 连接数据库
|
|
conn = sqlite3.connect("static/wordfreqapp.db")
|
|
# 创建游标
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"select text,level from article where article_id = "+str(id))
|
|
results = cursor.fetchall()
|
|
conn.commit()
|
|
cursor.close()
|
|
conn.close()
|
|
self._essay = results[0][0]
|
|
self._difficulty = results[0][1]
|
|
return results
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
# 将文章分割成单词列表
|
|
def split_essay_to_word(self):
|
|
article = "".join(self._essay)
|
|
words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article)
|
|
word_list = [word.lower() for word in words]
|
|
return word_list
|
|
|
|
# 从数据库中查找和文章难度相同的单词
|
|
def find_same_difficulty_words(self):
|
|
result_list = []
|
|
try:
|
|
# 连接数据库
|
|
conn = sqlite3.connect("static/wordfreqapp.db")
|
|
# 创建游标
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"select word from words where difficulty = " + str(self._difficulty))
|
|
results = cursor.fetchall()
|
|
conn.commit()
|
|
cursor.close()
|
|
conn.close()
|
|
for result in results:
|
|
for res in result:
|
|
result_list.append(res)
|
|
return result_list
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
# 获取单词的近义词
|
|
def get_word_synsets(self, word):
|
|
synsets_set = wn.synsets(word)
|
|
synset_list = []
|
|
for synset in synsets_set:
|
|
synset_list.append(synset.name().split(".")[0])
|
|
return synset_list
|
|
|
|
# 生成完形填空
|
|
def create_clozeTest(self, essay):
|
|
essay.article_id = self._article_id
|
|
word = [] # 存放文章中含有的与文章难度相同的单词
|
|
answers = [] # 存放正确答案
|
|
questions = [] # 存放题目
|
|
|
|
database_words = essay.find_same_difficulty_words()
|
|
essay_words = essay.split_essay_to_word()
|
|
# 寻找文章中与文章难度相同的单词存入word[]中
|
|
for essay_word in essay_words:
|
|
if database_words.__contains__(essay_word) and essay_word not in word:
|
|
word.append(essay_word)
|
|
# 给出因文章内容太少的问题导致题目少于10个的情况
|
|
if len(word) <= 10:
|
|
answers = word
|
|
else: # 将找出来的单词作为正确答案存入answers[]中
|
|
for i in range(0, 10):
|
|
w = word[random.randint(0, len(word) - 1)]
|
|
if not answers.__contains__(w):
|
|
answers.append(w)
|
|
|
|
self._answers = answers
|
|
# 用题号来替换文章中的单词
|
|
No = 1
|
|
for answer in answers:
|
|
questions.append(list(answer.split(",")))
|
|
self._essay = self._essay.replace(answer, '('+str(No)+')____', 1)
|
|
No += 1
|
|
# 生成每道题目的四个选项
|
|
for question in questions:
|
|
synset = list(set(essay.get_word_synsets(question[0])))
|
|
if len(synset) == 0 or len(synset) == 1:
|
|
question.append(word[random.randint(0, len(word) - 1)])
|
|
else:
|
|
syn = synset[random.randint(0, len(synset) - 1)]
|
|
while (syn == question[0]):
|
|
syn = synset[random.randint(0, len(synset) - 1)]
|
|
question.append(syn)
|
|
|
|
while len(question) < 4:
|
|
add_word = word[random.randint(0, len(word) - 1)]
|
|
while question.__contains__(add_word):
|
|
add_word = word[random.randint(0, len(word) - 1)]
|
|
question.append(add_word)
|
|
random.shuffle(question)
|
|
|
|
self._questions = questions |