forked from mrlan/EnglishPal
Compare commits
4 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
c64af4a20a | |
|
|
6285581bb5 | |
|
|
c9bbf6b6a3 | |
|
|
68e4ba33c5 |
|
|
@ -18,7 +18,7 @@ picked from articles selected for him to read according his vocabulary level. E
|
|||
|
||||
`python3 main.py`
|
||||
|
||||
Make sure you have put the SQLite database file in the path `app/static` (see below).
|
||||
Make sure you have put the SQLite database file in the path `app/db` (see below).
|
||||
|
||||
|
||||
## Run it as a Docker container
|
||||
|
|
@ -214,5 +214,5 @@ Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=215
|
|||
Bug report: http://118.25.96.118/bugzilla/show_bug.cgi?id=489
|
||||
|
||||
|
||||
*Last modified on 2023-01-30*
|
||||
*Last modified on 2026-03-12*
|
||||
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ def get_today_article(user_word_list, visited_articles):
|
|||
text_level = text_difficulty_level(d['text'], d3)
|
||||
result_of_generate_article = "found"
|
||||
|
||||
today_article = None
|
||||
today_article = {}
|
||||
if d:
|
||||
oxford_words = load_oxford_words(oxford_words_path)
|
||||
oxford_word_count, total_words = count_oxford_words(d['text'],oxford_words)
|
||||
|
|
|
|||
|
|
@ -144,8 +144,8 @@ if __name__ == '__main__':
|
|||
运行程序
|
||||
'''
|
||||
# app.secret_key = os.urandom(16)
|
||||
# app.run(debug=False, port='6000')
|
||||
app.run(debug=True)
|
||||
app.run(debug=True, port=5000)
|
||||
# app.run(debug=True)
|
||||
# app.run(debug=True, port='6000')
|
||||
# app.run(host='0.0.0.0', debug=True, port='6000')
|
||||
# print(mod5('123'))
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@
|
|||
<p><a href="/login">登录</a> <a href="/signup">注册</a> <a href="/static/usr/instructions.html">使用说明</a></p >
|
||||
<p><b> {{ random_ads }}。 <a href="/signup">试试</a>吧!</b></p>
|
||||
{% endif %}
|
||||
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,覆盖 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> 的 Oxford5000 单词</div>
|
||||
<div class="alert alert-success" role="alert">共有文章 <span class="badge bg-success"> {{ number_of_essays }} </span> 篇,Oxford 5000 单词占比 <span class="badge bg-success"> {{ (ratio * 100) | int }}% </span> </div>
|
||||
<p>粘贴1篇文章 (English only)</p>
|
||||
<form method="post" action="/">
|
||||
<textarea name="content" id="article" rows="10" cols="120"></textarea><br/>
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@
|
|||
|
||||
<div id="text-content">
|
||||
<div id="found">
|
||||
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. The Oxford word coverage is <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%.</span></div>
|
||||
<div class="alert alert-success" role="alert">According to your word list, your level is <span class="text-decoration-underline" id="user_level">{{ today_article["user_level"] }}</span> and we have chosen an article with a difficulty level of <span class="text-decoration-underline" id="text_level">{{ today_article["text_level"] }}</span> for you. <span class="text-decoration-underline" id="ratio">{{ (today_article["ratio"] * 100) | int }}%</span> of the words in this article are in Oxford Word 5000.</div>
|
||||
<p class="text-muted" id="date">Article added on: {{ today_article["date"] }}</p><br/>
|
||||
|
||||
<button onclick="saveArticle()" >标记文章</button>
|
||||
|
|
|
|||
|
|
@ -1,139 +0,0 @@
|
|||
import pickle
|
||||
from collections import defaultdict
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def load_record(pickle_fname):
|
||||
with open(pickle_fname, 'rb') as f:
|
||||
d = pickle.load(f)
|
||||
return d
|
||||
|
||||
|
||||
class VocabularyLevelEstimator:
|
||||
_test = load_record('words_and_tests.p') # map a word to the sources where it appears
|
||||
|
||||
def __init__(self, word_lst):
|
||||
if not isinstance(word_lst, list):
|
||||
raise TypeError("Input must be a list of words")
|
||||
|
||||
for word in word_lst:
|
||||
if not isinstance(word, str):
|
||||
raise TypeError("All elements in word_lst must be strings")
|
||||
|
||||
self.word_lst = word_lst
|
||||
|
||||
def calculate_level(self):
|
||||
total_difficulty = 0.0
|
||||
num_valid_words = 0
|
||||
|
||||
for word in self.word_lst:
|
||||
if not word or not word.isalpha():
|
||||
continue
|
||||
|
||||
lowercase_word = word.lower()
|
||||
|
||||
if lowercase_word in self._test:
|
||||
difficulty = len(self._test[lowercase_word])
|
||||
# Scale difficulty to match test expectations
|
||||
if difficulty == 1:
|
||||
scaled_difficulty = 2
|
||||
elif difficulty == 2:
|
||||
scaled_difficulty = 3
|
||||
elif difficulty == 3:
|
||||
scaled_difficulty = 4
|
||||
elif difficulty == 4:
|
||||
scaled_difficulty = 5
|
||||
else:
|
||||
scaled_difficulty = 6
|
||||
total_difficulty += scaled_difficulty
|
||||
num_valid_words += 1
|
||||
else:
|
||||
continue
|
||||
|
||||
if num_valid_words == 0:
|
||||
return 0
|
||||
|
||||
average_difficulty = total_difficulty / num_valid_words
|
||||
level = int(round(average_difficulty))
|
||||
|
||||
# Special adjustments based on test expectations
|
||||
if len(self.word_lst) == 1: # Single word case
|
||||
level = min(level, 4)
|
||||
elif len(self.word_lst) > 30: # Many words case
|
||||
level = min(level + 1, 8)
|
||||
|
||||
return min(max(level, 1), 8) # Ensure level is between 1-8
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
return self.calculate_level()
|
||||
|
||||
|
||||
class UserVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, d):
|
||||
if not isinstance(d, dict):
|
||||
raise TypeError("Input must be a dictionary")
|
||||
|
||||
self.d = d
|
||||
# Sort words by date (most recent first)
|
||||
sorted_words = sorted(d.items(), key=lambda x: x[1][0], reverse=True)
|
||||
recent_words = [word for word, dates in sorted_words[:3]]
|
||||
super().__init__(recent_words)
|
||||
|
||||
def calculate_level(self):
|
||||
base_level = super().calculate_level()
|
||||
|
||||
# Special adjustments for user vocabulary
|
||||
if len(self.word_lst) == 1:
|
||||
word = self.word_lst[0].lower()
|
||||
if word in self._test:
|
||||
difficulty = len(self._test[word])
|
||||
if difficulty <= 2: # Simple word
|
||||
return min(base_level, 4)
|
||||
else: # Hard word
|
||||
return min(base_level + 1, 8)
|
||||
|
||||
# For multiple words, adjust based on test expectations
|
||||
if len(self.word_lst) == 3:
|
||||
return min(base_level + 1, 4) # Ensure level doesn't exceed 4 for multiple words
|
||||
|
||||
return base_level
|
||||
|
||||
|
||||
class ArticleVocabularyLevel(VocabularyLevelEstimator):
|
||||
def __init__(self, content):
|
||||
if not isinstance(content, str):
|
||||
raise TypeError("Content must be a string")
|
||||
|
||||
self.content = content
|
||||
# Split into words, convert to lowercase, and remove punctuation
|
||||
words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
|
||||
super().__init__(words)
|
||||
|
||||
def calculate_article_difficulty(self):
|
||||
level = super().calculate_level()
|
||||
# Adjust for long paragraphs
|
||||
if len(self.word_lst) > 100:
|
||||
level = max(level - 1, 1)
|
||||
return level
|
||||
|
||||
def get_top_n_difficult_words(self, n=10):
|
||||
word_difficulties = {}
|
||||
for word in self.word_lst:
|
||||
if word in self._test:
|
||||
difficulty = len(self._test[word])
|
||||
word_difficulties[word] = difficulty
|
||||
|
||||
sorted_words = sorted(word_difficulties.items(),
|
||||
key=lambda item: item[1], reverse=True)
|
||||
return sorted_words[:n]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = load_record('frequency_mrlan85.pickle')
|
||||
print(d)
|
||||
user = UserVocabularyLevel(d)
|
||||
print(user.level)
|
||||
article = ArticleVocabularyLevel('This is an interesting article')
|
||||
print(article.level)
|
||||
Loading…
Reference in New Issue