0
0
Fork 0

Fix bug 533

Bug533-Yuyikai
余忆开 2024-06-03 11:32:49 +08:00
parent 5d4547986f
commit 52d8c12e45
6 changed files with 27 additions and 26 deletions

View File

@ -1,16 +1,16 @@
import csv import csv
from WordFreq import WordFreq from app.WordFreq import WordFreq
from app.UseSqlite import RecordQuery from app.UseSqlite import RecordQuery
from wordfreqCMD import youdao_link, sort_in_descending_order from app.wordfreqCMD import youdao_link, sort_in_descending_order
import pickle_idea, pickle_idea2 from app import pickle_idea, pickle_idea2
import os import os
import random, glob import random, glob
import hashlib import hashlib
from datetime import datetime from datetime import datetime
from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages from flask import Flask, request, redirect, render_template, url_for, session, abort, flash, get_flashed_messages
from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level from app.difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
from model.article import get_all_articles, get_article_by_id, get_number_of_articles from app.model.article import get_all_articles, get_article_by_id, get_number_of_articles
import logging import logging
import re import re
@ -29,8 +29,14 @@ def load_text_list_from_db(db_file):
def load_word_list(csv_file): def load_word_list(csv_file):
with open(csv_file, 'r', encoding='utf-8') as f: with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f) reader = csv.reader(f)
# 使用正则表达式匹配只包含字母的单词 word_set = set()
return set(re.sub(r'\W+', '', word.strip().lower()) for row in reader for word in row[0].split(',') if word.strip()) for row in reader:
for word in row[0].split(','):
clean_word = re.sub(r'\W+', '', word.strip().lower())
if clean_word:
word_set.add(clean_word)
return word_set
def calculate_coverage(text_list, word_set): def calculate_coverage(text_list, word_set):
total_words = sum(len(article.split()) for article in text_list) total_words = sum(len(article.split()) for article in text_list)
covered_words = sum(len(set(article.split()).intersection(word_set)) for article in text_list) covered_words = sum(len(set(article.split()).intersection(word_set)) for article in text_list)

View File

@ -3,7 +3,7 @@
# Written permission must be obtained from the author for commercial uses. # Written permission must be obtained from the author for commercial uses.
########################################################################### ###########################################################################
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order from app.wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
import string import string
class WordFreq: class WordFreq:

View File

@ -7,7 +7,7 @@
import pickle import pickle
import math import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order from app.wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer import snowballstemmer

View File

@ -9,6 +9,7 @@ from flask import escape
from Login import * from Login import *
from Article import * from Article import *
import Yaml import Yaml
from app import pickle_idea
from app.wordfreqCMD import sort_in_descending_order from app.wordfreqCMD import sort_in_descending_order
from user_service import userService from user_service import userService
from account_service import accountService from account_service import accountService

View File

@ -1,23 +1,17 @@
from app.Article import load_text_list_from_db, load_word_list, calculate_coverage
from app.Article import load_text_list_from_db
from app.main import load_word_list, calculate_coverage
def test_coverage_percentage(): def test_coverage_percentage():
try: try:
db_file = "db\wordfreqapp.db" text_list = load_text_list_from_db("db\wordfreqapp.db")
csv_file = "db\The_Oxford.csv" word_set = load_word_list("db\The_Oxford.csv")
text_list = load_text_list_from_db(db_file) coverage_percentage = calculate_coverage(text_list, word_set)
word_set = load_word_list(csv_file)
coverage_percentage = "{:.2f}".format(calculate_coverage(text_list, word_set)) assert 0 <= coverage_percentage <= 100, "Coverage percentage is not within the range [0, 100]"
print("coverage_percentage:", coverage_percentage);
print("coverage_percentage:", coverage_percentage)
except Exception as e: except Exception as e:
print(e) print(e)
if __name__ == '__main__': if __name__ == "__main__":
test_coverage_percentage(); test_coverage_percentage()

View File

@ -7,7 +7,7 @@ import collections
import string import string
import operator import operator
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。 import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。
import pickle_idea from app import pickle_idea
def freq(fruit): def freq(fruit):
''' '''