1
0
Fork 0
EnglishPal/app/cloze/create_clozeTest.py

127 lines
4.4 KiB
Python

import random
import sqlite3
import re
from nltk.corpus import wordnet as wn
class Essay:
def __init__(self):
self._article_id = 0
self._essay = None
self._difficulty = None
self._answers = None
self._questions = None
pass
@property
def article_id(self):
pass
@article_id.setter
def article_id(self, article_id):
self._article_id = article_id
self.find_essay_in_database(self._article_id)
# 获取数据库中的文章和等级
def find_essay_in_database(self, id):
try:
# 连接数据库
conn = sqlite3.connect("static/wordfreqapp.db")
# 创建游标
cursor = conn.cursor()
cursor.execute(
"select text,level from article where article_id = "+str(id))
results = cursor.fetchall()
conn.commit()
cursor.close()
conn.close()
self._essay = results[0][0]
self._difficulty = results[0][1]
return results
except Exception as e:
print(e)
# 将文章分割成单词列表
def split_essay_to_word(self):
article = "".join(self._essay)
words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article)
word_list = [word.lower() for word in words]
return word_list
# 从数据库中查找和文章难度相同的单词
def find_same_difficulty_words(self):
result_list = []
try:
# 连接数据库
conn = sqlite3.connect("static/wordfreqapp.db")
# 创建游标
cursor = conn.cursor()
cursor.execute(
"select word from words where difficulty = " + str(self._difficulty))
results = cursor.fetchall()
conn.commit()
cursor.close()
conn.close()
for result in results:
for res in result:
result_list.append(res)
return result_list
except Exception as e:
print(e)
# 获取单词的近义词
def get_word_synsets(self, word):
synsets_set = wn.synsets(word)
synset_list = []
for synset in synsets_set:
synset_list.append(synset.name().split(".")[0])
return synset_list
# 生成完形填空
def create_clozeTest(self, essay):
essay.article_id = self._article_id
word = [] # 存放文章中含有的与文章难度相同的单词
answers = [] # 存放正确答案
questions = [] # 存放题目
database_words = essay.find_same_difficulty_words()
essay_words = essay.split_essay_to_word()
# 寻找文章中与文章难度相同的单词存入word[]中
for essay_word in essay_words:
if database_words.__contains__(essay_word) and essay_word not in word:
word.append(essay_word)
# 给出因文章内容太少的问题导致题目少于10个的情况
if len(word) <= 10:
answers = word
else: # 将找出来的单词作为正确答案存入answers[]中
for i in range(0, 10):
w = word[random.randint(0, len(word) - 1)]
if not answers.__contains__(w):
answers.append(w)
self._answers = answers
# 用题号来替换文章中的单词
No = 1
for answer in answers:
questions.append(list(answer.split(",")))
self._essay = self._essay.replace(answer, '('+str(No)+')____', 1)
No += 1
# 生成每道题目的四个选项
for question in questions:
synset = list(set(essay.get_word_synsets(question[0])))
if len(synset) == 0 or len(synset) == 1:
question.append(word[random.randint(0, len(word) - 1)])
else:
syn = synset[random.randint(0, len(synset) - 1)]
while (syn == question[0]):
syn = synset[random.randint(0, len(synset) - 1)]
question.append(syn)
while len(question) < 4:
add_word = word[random.randint(0, len(word) - 1)]
while question.__contains__(add_word):
add_word = word[random.randint(0, len(word) - 1)]
question.append(add_word)
random.shuffle(question)
self._questions = questions