From 52d8c12e45a725adc50975b30111d32707d71920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=99=E5=BF=86=E5=BC=80?= <余忆开@2291723778@qq.com> Date: Mon, 3 Jun 2024 11:32:49 +0800 Subject: [PATCH] Fix bug 533 --- app/Article.py | 20 +++++++++++++------- app/WordFreq.py | 2 +- app/difficulty.py | 2 +- app/main.py | 1 + app/test/test_bug533_Yuyikai.py | 26 ++++++++++---------------- app/wordfreqCMD.py | 2 +- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/app/Article.py b/app/Article.py index 216c413..58335ec 100644 --- a/app/Article.py +++ b/app/Article.py @@ -1,16 +1,16 @@ import csv -from WordFreq import WordFreq +from app.WordFreq import WordFreq from app.UseSqlite import RecordQuery -from wordfreqCMD import youdao_link, sort_in_descending_order -import pickle_idea, pickle_idea2 +from app.wordfreqCMD import youdao_link, sort_in_descending_order +from app import pickle_idea, pickle_idea2 import os import random, glob import hashlib from datetime import datetime from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages -from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level -from model.article import get_all_articles, get_article_by_id, get_number_of_articles +from app.difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level +from app.model.article import get_all_articles, get_article_by_id, get_number_of_articles import logging import re @@ -29,8 +29,14 @@ def load_text_list_from_db(db_file): def load_word_list(csv_file): with open(csv_file, 'r', encoding='utf-8') as f: reader = csv.reader(f) - # 使用正则表达式匹配只包含字母的单词 - return set(re.sub(r'\W+', '', word.strip().lower()) for row in reader for word in row[0].split(',') if word.strip()) + word_set = set() + for row in reader: + for word in row[0].split(','): + clean_word = re.sub(r'\W+', '', word.strip().lower()) + if clean_word: + word_set.add(clean_word) + return word_set + def calculate_coverage(text_list, word_set): total_words = sum(len(article.split()) for article in text_list) covered_words = sum(len(set(article.split()).intersection(word_set)) for article in text_list) diff --git a/app/WordFreq.py b/app/WordFreq.py index 3620a41..90a462e 100644 --- a/app/WordFreq.py +++ b/app/WordFreq.py @@ -3,7 +3,7 @@ # Written permission must be obtained from the author for commercial uses. ########################################################################### -from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order +from app.wordfreqCMD import remove_punctuation, freq, sort_in_descending_order import string class WordFreq: diff --git a/app/difficulty.py b/app/difficulty.py index 1bd8d68..12db1c5 100644 --- a/app/difficulty.py +++ b/app/difficulty.py @@ -7,7 +7,7 @@ import pickle import math -from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order +from app.wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order import snowballstemmer diff --git a/app/main.py b/app/main.py index ff65b62..8af545f 100644 --- a/app/main.py +++ b/app/main.py @@ -9,6 +9,7 @@ from flask import escape from Login import * from Article import * import Yaml +from app import pickle_idea from app.wordfreqCMD import sort_in_descending_order from user_service import userService from account_service import accountService diff --git a/app/test/test_bug533_Yuyikai.py b/app/test/test_bug533_Yuyikai.py index 2ba4d5c..afde736 100644 --- a/app/test/test_bug533_Yuyikai.py +++ b/app/test/test_bug533_Yuyikai.py @@ -1,23 +1,17 @@ - -from app.Article import load_text_list_from_db -from app.main import load_word_list, calculate_coverage +from app.Article import load_text_list_from_db, load_word_list, calculate_coverage def test_coverage_percentage(): try: - db_file = "db\wordfreqapp.db" - csv_file = "db\The_Oxford.csv" - text_list = load_text_list_from_db(db_file) - word_set = load_word_list(csv_file) - coverage_percentage = "{:.2f}".format(calculate_coverage(text_list, word_set)) - print("coverage_percentage:", coverage_percentage); + text_list = load_text_list_from_db("db\wordfreqapp.db") + word_set = load_word_list("db\The_Oxford.csv") + coverage_percentage = calculate_coverage(text_list, word_set) + + assert 0 <= coverage_percentage <= 100, "Coverage percentage is not within the range [0, 100]" + + print("coverage_percentage:", coverage_percentage) except Exception as e: print(e) -if __name__ == '__main__': - test_coverage_percentage(); - - - - - +if __name__ == "__main__": + test_coverage_percentage() \ No newline at end of file diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py index dcee74e..8ec00ec 100644 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -7,7 +7,7 @@ import collections import string import operator import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 -import pickle_idea +from app import pickle_idea def freq(fruit): '''