import random import sqlite3 import re from nltk.corpus import wordnet as wn class Essay: def __init__(self): self._article_id = 0 self._essay = None self._difficulty = None self._answers = None self._questions = None pass @property def article_id(self): pass @article_id.setter def article_id(self, article_id): self._article_id = article_id self.find_essay_in_database(self._article_id) # 获取数据库中的文章和等级 def find_essay_in_database(self, id): try: # 连接数据库 conn = sqlite3.connect("static/wordfreqapp.db") # 创建游标 cursor = conn.cursor() cursor.execute( "select text,level from article where article_id = "+str(id)) results = cursor.fetchall() conn.commit() cursor.close() conn.close() self._essay = results[0][0] self._difficulty = results[0][1] return results except Exception as e: print(e) # 将文章分割成单词列表 def split_essay_to_word(self): article = "".join(self._essay) words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article) word_list = [word.lower() for word in words] return word_list # 从数据库中查找和文章难度相同的单词 def find_same_difficulty_words(self): result_list = [] try: # 连接数据库 conn = sqlite3.connect("static/wordfreqapp.db") # 创建游标 cursor = conn.cursor() cursor.execute( "select word from words where difficulty = " + str(self._difficulty)) results = cursor.fetchall() conn.commit() cursor.close() conn.close() for result in results: for res in result: result_list.append(res) return result_list except Exception as e: print(e) # 获取单词的近义词 def get_word_synsets(self, word): synsets_set = wn.synsets(word) synset_list = [] for synset in synsets_set: synset_list.append(synset.name().split(".")[0]) return synset_list # 生成完形填空 def create_clozeTest(self, essay): essay.article_id = self._article_id word = [] # 存放文章中含有的与文章难度相同的单词 answers = [] # 存放正确答案 questions = [] # 存放题目 database_words = essay.find_same_difficulty_words() essay_words = essay.split_essay_to_word() # 寻找文章中与文章难度相同的单词存入word[]中 for essay_word in essay_words: if database_words.__contains__(essay_word) and essay_word not in word: word.append(essay_word) # 给出因文章内容太少的问题导致题目少于10个的情况 if len(word) <= 10: answers = word else: # 将找出来的单词作为正确答案存入answers[]中 for i in range(0, 10): w = word[random.randint(0, len(word) - 1)] if not answers.__contains__(w): answers.append(w) self._answers = answers # 用题号来替换文章中的单词 No = 1 for answer in answers: questions.append(list(answer.split(","))) self._essay = self._essay.replace(answer, '('+str(No)+')____', 1) No += 1 # 生成每道题目的四个选项 for question in questions: synset = list(set(essay.get_word_synsets(question[0]))) if len(synset) == 0 or len(synset) == 1: question.append(word[random.randint(0, len(word) - 1)]) else: syn = synset[random.randint(0, len(synset) - 1)] while (syn == question[0]): syn = synset[random.randint(0, len(synset) - 1)] question.append(syn) while len(question) < 4: add_word = word[random.randint(0, len(word) - 1)] while question.__contains__(add_word): add_word = word[random.randint(0, len(word) - 1)] question.append(add_word) random.shuffle(question) self._questions = questions