diff --git a/app/cloze/create_clozeTest.py b/app/cloze/create_clozeTest.py new file mode 100644 index 0000000..c11dc31 --- /dev/null +++ b/app/cloze/create_clozeTest.py @@ -0,0 +1,122 @@ +import random +import sqlite3,re +from nltk.corpus import wordnet as wn + +class Essay: + def __init__(self): + self._article_id = 0 + self._essay = None + self._difficulty = None + self._answers = None + self._questions = None + pass + + @property + def article_id(self): + pass + + @article_id.setter + def article_id(self,article_id): + self._article_id = article_id + self.find_essay_in_database(self._article_id) + + #获取数据库中的文章和等级 + def find_essay_in_database(self,id): + try: + # 连接数据库 + conn = sqlite3.connect("static/wordfreqapp.db") + # 创建游标 + cursor = conn.cursor() + cursor.execute("select text,level from article where article_id = "+str(id)) + results = cursor.fetchall() + conn.commit() + cursor.close() + conn.close() + self._essay = results[0][0] + self._difficulty = results[0][1] + return results + except Exception as e: + print(e) + + #将文章分割成单词列表 + def split_essay_to_word(self): + article = "".join(self._essay) + words = re.split(r"\b[\.,\s\n\r\n\$\']+?\b", article) + word_list = [word.lower() for word in words] + return word_list + + #从数据库中查找和文章难度相同的单词 + def find_same_difficulty_words(self): + result_list = [] + try: + # 连接数据库 + conn = sqlite3.connect("static/wordfreqapp.db") + # 创建游标 + cursor = conn.cursor() + cursor.execute("select word from words where difficulty = " + str(self._difficulty)) + results = cursor.fetchall() + conn.commit() + cursor.close() + conn.close() + for result in results: + for res in result: + result_list.append(res) + return result_list + except Exception as e: + print(e) + + #获取单词的近义词 + def get_word_synsets(self,word): + synsets_set = wn.synsets(word) + synset_list = [] + for synset in synsets_set: + synset_list.append(synset.name().split(".")[0]) + return synset_list + + #生成完形填空 + def create_clozeTest(self,essay): + essay.article_id = self._article_id + word = [] # 存放文章中含有的与文章难度相同的单词 + answers = [] # 存放正确答案 + questions = [] # 存放题目 + + database_words = essay.find_same_difficulty_words() + essay_words = essay.split_essay_to_word() + for essay_word in essay_words: + if database_words.__contains__(essay_word) and essay_word not in word: + word.append(essay_word) + + if len(word) <= 10: + answers = word + else: + for i in range(0, 10): + w = word[random.randint(0, len(word) - 1)] + if not answers.__contains__(w): + answers.append(w) + + self._answers = answers + + No = 1 + for answer in answers: + questions.append(list(answer.split(","))) + self._essay = self._essay.replace(answer,'('+str(No)+')____', 1) + No += 1 + + for question in questions: + synset = list(set(essay.get_word_synsets(question[0]))) + if len(synset) == 0 or len(synset) == 1: + question.append(word[random.randint(0, len(word) - 1)]) + else: + syn = synset[random.randint(0, len(synset) - 1)] + while (syn == question[0]): + syn = synset[random.randint(0, len(synset) - 1)] + question.append(syn) + + while len(question) < 4: + add_word = word[random.randint(0, len(word) - 1)] + while question.__contains__(add_word): + add_word = word[random.randint(0, len(word) - 1)] + question.append(add_word) + random.shuffle(question) + + self._questions = questions \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f431e0a..fcb5910 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ Flask==2.1.2 selenium==3.141.0 PyYAML~=6.0 +nltk==3.7 \ No newline at end of file