Fix bug 533

2024-05-29 15:56:53 +08:00 · 2024-05-29 15:56:53 +08:00 · 62d978ad54
parent d6f4c247ea
commit 62d978ad54
5 changed files with 16641 additions and 16719 deletions
--- a/app/Article.py
+++ b/app/Article.py
@ -1,3 +1,5 @@
+import csv
+
 from WordFreq import WordFreq
 from app.UseSqlite import RecordQuery
 from wordfreqCMD import youdao_link, sort_in_descending_order
@ -10,6 +12,7 @@ from flask import Flask, request, redirect, render_template, url_for, session, a
 from difficulty import get_difficulty_level_for_user, text_difficulty_level, user_difficulty_level
 from model.article import get_all_articles, get_article_by_id, get_number_of_articles
 import logging
+import re

 path_prefix = './'
 db_path_prefix = './db/'  # comment this line in deployment
@ -23,6 +26,17 @@ def load_text_list_from_db(db_file):
    text_list = [row['text'] for row in result if 'text' in row]
    return text_list

+def load_word_list(csv_file):
+    with open(csv_file, 'r', encoding='utf-8') as f:
+        reader = csv.reader(f)
+        # 使用正则表达式匹配只包含字母的单词
+        return set(re.sub(r'\W+', '', word.strip().lower()) for row in reader for word in row[0].split(',') if word.strip())
+def calculate_coverage(text_list, word_set):
+    total_words = sum(len(article.split()) for article in text_list)
+    covered_words = sum(len(set(article.split()).intersection(word_set)) for article in text_list)
+    return (covered_words / total_words) * 10000 if total_words else 0
+
+
 def total_number_of_essays():
    return get_number_of_articles()

--- a/app/db/The_Oxford.csv
+++ b/app/db/The_Oxford.csv
--- a/app/main.py
+++ b/app/main.py
@ -1,6 +1,5 @@
 #! /usr/bin/python3
 # -*- coding: utf-8 -*-
-import csv

 ###########################################################################
 # Copyright 2019 (C) Hui Lan <hui.lan@cantab.net>
@ -28,16 +27,6 @@ path_prefix = '/var/www/wordfreq/wordfreq/'
 path_prefix = './'  # comment this line in deployment


-def load_word_list(csv_file):
-    with open(csv_file, 'r', encoding='utf-8') as f:  # 添加encoding参数指定编码方式
-        reader = csv.reader(f)
-        return set(word.strip().lower() for row in reader for word in row[0].split(','))
-
-def calculate_coverage(text_list, word_set):
-    total_words = sum(len(article.split()) for article in text_list)
-    covered_words = sum(len(set(article.split()).intersection(word_set)) for article in text_list)
-    return (covered_words / total_words) * 10000 if total_words else 0
-
 def get_random_image(path):
    '''
    返回随机图
@ -103,6 +92,7 @@ def mainpage():
    coverage_percentage = "{:.2f}".format(calculate_coverage(text_list, word_set))


+
    if request.method == 'POST':  # when we submit a form
        content = escape(request.form['content'])
        f = WordFreq(content)
--- a/app/model/article.py
+++ b/app/model/article.py
@ -1,4 +1,4 @@
-from model import *
+from app.model import *
 from datetime import datetime

 def add_article(content, source="manual_input", level="5", question="No question"):
--- a/app/model/user.py
+++ b/app/model/user.py
@ -1,4 +1,4 @@
-from model import *
+from app.model import *
 from Login import md5
 from pony import orm