[IMPROVE]:create_clozeTest.py:生成完型填空
parent
3e554c61c9
commit
f214838cdd
|
@ -0,0 +1,122 @@
|
|||
import random
|
||||
import sqlite3,re
|
||||
from nltk.corpus import wordnet as wn
|
||||
|
||||
class Essay:
|
||||
def __init__(self):
|
||||
self._article_id = 0
|
||||
self._essay = None
|
||||
self._difficulty = None
|
||||
self._answers = None
|
||||
self._questions = None
|
||||
pass
|
||||
|
||||
@property
|
||||
def article_id(self):
|
||||
pass
|
||||
|
||||
@article_id.setter
|
||||
def article_id(self,article_id):
|
||||
self._article_id = article_id
|
||||
self.find_essay_in_database(self._article_id)
|
||||
|
||||
#获取数据库中的文章和等级
|
||||
def find_essay_in_database(self,id):
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = sqlite3.connect("static/wordfreqapp.db")
|
||||
# 创建游标
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("select text,level from article where article_id = "+str(id))
|
||||
results = cursor.fetchall()
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
self._essay = results[0][0]
|
||||
self._difficulty = results[0][1]
|
||||
return results
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
#将文章分割成单词列表
|
||||
def split_essay_to_word(self):
|
||||
article = "".join(self._essay)
|
||||
words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article)
|
||||
word_list = [word.lower() for word in words]
|
||||
return word_list
|
||||
|
||||
#从数据库中查找和文章难度相同的单词
|
||||
def find_same_difficulty_words(self):
|
||||
result_list = []
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = sqlite3.connect("static/wordfreqapp.db")
|
||||
# 创建游标
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("select word from words where difficulty = " + str(self._difficulty))
|
||||
results = cursor.fetchall()
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
for result in results:
|
||||
for res in result:
|
||||
result_list.append(res)
|
||||
return result_list
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
#获取单词的近义词
|
||||
def get_word_synsets(self,word):
|
||||
synsets_set = wn.synsets(word)
|
||||
synset_list = []
|
||||
for synset in synsets_set:
|
||||
synset_list.append(synset.name().split(".")[0])
|
||||
return synset_list
|
||||
|
||||
#生成完形填空
|
||||
def create_clozeTest(self,essay):
|
||||
essay.article_id = self._article_id
|
||||
word = [] # 存放文章中含有的与文章难度相同的单词
|
||||
answers = [] # 存放正确答案
|
||||
questions = [] # 存放题目
|
||||
|
||||
database_words = essay.find_same_difficulty_words()
|
||||
essay_words = essay.split_essay_to_word()
|
||||
for essay_word in essay_words:
|
||||
if database_words.__contains__(essay_word) and essay_word not in word:
|
||||
word.append(essay_word)
|
||||
|
||||
if len(word) <= 10:
|
||||
answers = word
|
||||
else:
|
||||
for i in range(0, 10):
|
||||
w = word[random.randint(0, len(word) - 1)]
|
||||
if not answers.__contains__(w):
|
||||
answers.append(w)
|
||||
|
||||
self._answers = answers
|
||||
|
||||
No = 1
|
||||
for answer in answers:
|
||||
questions.append(list(answer.split(",")))
|
||||
self._essay = self._essay.replace(answer,'('+str(No)+')____', 1)
|
||||
No += 1
|
||||
|
||||
for question in questions:
|
||||
synset = list(set(essay.get_word_synsets(question[0])))
|
||||
if len(synset) == 0 or len(synset) == 1:
|
||||
question.append(word[random.randint(0, len(word) - 1)])
|
||||
else:
|
||||
syn = synset[random.randint(0, len(synset) - 1)]
|
||||
while (syn == question[0]):
|
||||
syn = synset[random.randint(0, len(synset) - 1)]
|
||||
question.append(syn)
|
||||
|
||||
while len(question) < 4:
|
||||
add_word = word[random.randint(0, len(word) - 1)]
|
||||
while question.__contains__(add_word):
|
||||
add_word = word[random.randint(0, len(word) - 1)]
|
||||
question.append(add_word)
|
||||
random.shuffle(question)
|
||||
|
||||
self._questions = questions
|
|
@ -1,3 +1,4 @@
|
|||
Flask==2.1.2
|
||||
selenium==3.141.0
|
||||
PyYAML~=6.0
|
||||
nltk==3.7
|
Loading…
Reference in New Issue