EnglishPal/app/cloze/create_clozeTest.py

import random
import sqlite3
import re
from nltk.corpus import wordnet as wn


class Essay:
    def __init__(self):
        self._article_id = 0
        self._essay = None
        self._difficulty = None
        self._answers = None
        self._questions = None
        pass

    @property
    def article_id(self):
        pass

    @article_id.setter
    def article_id(self, article_id):
        self._article_id = article_id
        self.find_essay_in_database(self._article_id)

    # 获取数据库中的文章和等级
    def find_essay_in_database(self, id):
        try:
            # 连接数据库
            conn = sqlite3.connect("static/wordfreqapp.db")
            # 创建游标
            cursor = conn.cursor()
            cursor.execute(
                "select text,level from article where article_id = "+str(id))
            results = cursor.fetchall()
            conn.commit()
            cursor.close()
            conn.close()
            self._essay = results[0][0]
            self._difficulty = results[0][1]
            return results
        except Exception as e:
            print(e)

    # 将文章分割成单词列表
    def split_essay_to_word(self):
        article = "".join(self._essay)
        words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article)
        word_list = [word.lower() for word in words]
        return word_list

    # 从数据库中查找和文章难度相同的单词
    def find_same_difficulty_words(self):
        result_list = []
        try:
            # 连接数据库
            conn = sqlite3.connect("static/wordfreqapp.db")
            # 创建游标
            cursor = conn.cursor()
            cursor.execute(
                "select word from words where difficulty = " + str(self._difficulty))
            results = cursor.fetchall()
            conn.commit()
            cursor.close()
            conn.close()
            for result in results:
                for res in result:
                    result_list.append(res)
            return result_list
        except Exception as e:
            print(e)

    # 获取单词的近义词
    def get_word_synsets(self, word):
        synsets_set = wn.synsets(word)
        synset_list = []
        for synset in synsets_set:
            synset_list.append(synset.name().split(".")[0])
        return synset_list

    # 生成完形填空
    def create_clozeTest(self, essay):
        essay.article_id = self._article_id
        word = []  # 存放文章中含有的与文章难度相同的单词
        answers = []  # 存放正确答案
        questions = []  # 存放题目

        database_words = essay.find_same_difficulty_words()
        essay_words = essay.split_essay_to_word()
        # 寻找文章中与文章难度相同的单词存入word[]中
        for essay_word in essay_words:
            if database_words.__contains__(essay_word) and essay_word not in word:
                word.append(essay_word)
        # 给出因文章内容太少的问题导致题目少于10个的情况
        if len(word) <= 10:
            answers = word
        else:  # 将找出来的单词作为正确答案存入answers[]中
            for i in range(0, 10):
                w = word[random.randint(0, len(word) - 1)]
                if not answers.__contains__(w):
                    answers.append(w)

        self._answers = answers
        # 用题号来替换文章中的单词
        No = 1
        for answer in answers:
            questions.append(list(answer.split(",")))
            self._essay = self._essay.replace(answer, '('+str(No)+')____', 1)
            No += 1
        # 生成每道题目的四个选项
        for question in questions:
            synset = list(set(essay.get_word_synsets(question[0])))
            if len(synset) == 0 or len(synset) == 1:
                question.append(word[random.randint(0, len(word) - 1)])
            else:
                syn = synset[random.randint(0, len(synset) - 1)]
                while (syn == question[0]):
                    syn = synset[random.randint(0, len(synset) - 1)]
                question.append(syn)

            while len(question) < 4:
                add_word = word[random.randint(0, len(word) - 1)]
                while question.__contains__(add_word):
                    add_word = word[random.randint(0, len(word) - 1)]
                question.append(add_word)
            random.shuffle(question)

        self._questions = questions