Bug #585-Semenichenko

Bug585-clean-final
Semenichenko Anna 2025-06-12 15:04:37 +08:00
parent d9512c929b
commit bb3ae80f0d
8 changed files with 599 additions and 175 deletions

View File

@ -12,7 +12,7 @@ import logging
import re
path_prefix = './'
db_path_prefix = './db/' # comment this line in deployment
oxford_words_path='./db/oxford_words.txt'
oxford_words_path='C:\\Users\\ANNA\\Desktop\\ooad\\app\\db\\oxford_words.txt'
def count_oxford_words(text, oxford_words):
words = re.findall(r'\b\w+\b', text.lower())

View File

@ -14,28 +14,27 @@ def md5(s):
return h.hexdigest()
# import model.user after the defination of md5(s) to avoid circular import
from model.user import get_user_by_username, insert_user, update_password_by_username
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def verify_user(username, password):
from model.user import get_user_by_username
user = get_user_by_username(username)
encoded_password = md5(username + password)
return user is not None and user.password == encoded_password
def add_user(username, password):
from model.user import insert_user
start_date = datetime.now().strftime('%Y%m%d')
expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d') # will expire after 30 days
# 将用户名和密码一起加密,以免暴露不同用户的相同密码
expiry_date = (datetime.now() + timedelta(days=30)).strftime('%Y%m%d')
password = md5(username + password)
insert_user(username=username, password=password, start_date=start_date, expiry_date=expiry_date)
def check_username_availability(username):
from model.user import get_user_by_username
existed_user = get_user_by_username(username)
return existed_user is None
@ -53,11 +52,13 @@ def change_password(username, old_password, new_password):
# 将用户名和密码一起加密,以免暴露不同用户的相同密码
if new_password == old_password: #新旧密码一致
return {'error':'New password cannot be the same as the old password.', 'username':username}
from model.user import update_password_by_username
update_password_by_username(username, new_password)
return {'success':'Password changed', 'username':username}
def get_expiry_date(username):
from model.user import get_user_by_username
user = get_user_by_username(username)
if user is None:
return '20191024'
@ -79,11 +80,11 @@ class UserName:
def validate(self):
if len(self.username) > 20:
return f'{self.username} is too long. The user name cannot exceed 20 characters.'
if self.username.startswith('.'): # a user name must not start with a dot
if self.username.startswith('.'): # a user name must not start with a dot
return 'Period (.) is not allowed as the first letter in the user name.'
if ' ' in self.username: # a user name must not include a whitespace
if ' ' in self.username: # a user name must not include a whitespace
return 'Whitespace is not allowed in the user name.'
for c in self.username: # a user name must not include special characters, except non-leading periods or underscores
for c in self.username: # a user name must not include special characters, except non-leading periods or underscores
if c in string.punctuation and c != '.' and c != '_':
return f'{c} is not allowed in the user name.'
if self.username in ['signup', 'login', 'logout', 'reset', 'mark', 'back', 'unfamiliar', 'familiar', 'del',

34
app/create_pickle.py Normal file
View File

@ -0,0 +1,34 @@
import pickle
import os
# Sample vocabulary data - simulating a user's word history
# Format: word -> list of dates when the word was studied
test_data = {
"hello": ["20240101"],
"world": ["20240101", "20240102"],
"computer": ["20240101", "20240103"],
"programming": ["20240102"],
"python": ["20240102", "20240103"],
"algorithm": ["20240103"],
"database": ["20240103"],
"interface": ["20240104"],
"vocabulary": ["20240104"],
"sophisticated": ["20240104"]
}
# Ensure frequency directory exists
base_path = r'C:\Users\ANNA\Desktop\app\static\frequency'
os.makedirs(base_path, exist_ok=True)
# Save the test data
file_path = os.path.join(base_path, 'mr1an85.pickle')
with open(file_path, 'wb') as f:
pickle.dump(test_data, f)
print(f"Test file created at: {file_path}")
# Verify the file was created and can be read
with open(file_path, 'rb') as f:
loaded_data = pickle.load(f)
print("\nVerifying data:")
print(loaded_data)

View File

@ -3,72 +3,82 @@
# Written permission must be obtained from the author for commercial uses.
###########################################################################
# Purpose: compute difficulty level of a English text
# Purpose: compute difficulty level of an English text (Refactored with OO Design)
import pickle
import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
import snowballstemmer
import os
import string
def load_record(pickle_fname):
f = open(pickle_fname, 'rb')
d = pickle.load(f)
f.close()
return d
ENGLISH_WORD_DIFFICULTY_DICT = {}
def convert_test_type_to_difficulty_level(d):
class DifficultyEstimator:
"""
对原本的单词库中的单词进行难度评级
:param d: 存储了单词库pickle文件中的单词的字典
:return:
A class to estimate the difficulty level of English words and texts.
"""
result = {}
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
for k in L:
if 'CET4' in d[k]:
result[k] = 4 # CET4 word has level 4
elif 'OXFORD3000' in d[k]:
result[k] = 5
elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
result[k] = 6
elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
result[k] = 7
elif 'BBC' in d[k]:
result[k] = 8
def __init__(self, pickle_fname=None):
"""
Initialize the DifficultyEstimator with pre-computed difficulty levels
:param pickle_fname: Path to the pickle file containing word test data
"""
self.word_difficulty_dict = {} # Stores pre-computed difficulty levels
self.stemmer = snowballstemmer.stemmer('english')
self.stop_words = {
'the', 'and', 'of', 'to', 'what', 'in', 'there', 'when', 'them',
'would', 'will', 'out', 'his', 'mr', 'that', 'up', 'more', 'your'
# ... add other stop words ...
}
# Pre-compute difficulty levels if pickle file is provided
if pickle_fname:
self._initialize_difficulty_levels(pickle_fname)
global ENGLISH_WORD_DIFFICULTY_DICT
ENGLISH_WORD_DIFFICULTY_DICT = result
def _initialize_difficulty_levels(self, pickle_fname):
"""
Load word data and pre-compute all difficulty levels
:param pickle_fname: Path to the pickle file
"""
try:
with open(pickle_fname, 'rb') as f:
word_data = pickle.load(f)
self._compute_difficulty_levels(word_data)
except FileNotFoundError:
print(f"Warning: Could not find difficulty data file: {pickle_fname}")
return result # {'apple': 4, ...}
def _compute_difficulty_levels(self, word_data):
"""
Pre-compute difficulty levels for all words
:param word_data: Dictionary containing word test data
"""
for word, tests in word_data.items():
if 'CET4' in tests:
self.word_difficulty_dict[word] = 4
elif 'OXFORD3000' in tests:
self.word_difficulty_dict[word] = 5
elif 'CET6' in tests or 'GRADUATE' in tests:
self.word_difficulty_dict[word] = 6
elif 'OXFORD5000' in tests or 'IELTS' in tests:
self.word_difficulty_dict[word] = 7
elif 'BBC' in tests:
self.word_difficulty_dict[word] = 8
def get_difficulty_level_for_user(d1, d2):
"""
d2 来自于词库的35511个已标记单词
d1 用户不会的词
在d2的后面添加单词没有新建一个新的字典
"""
# TODO: convert_test_type_to_difficulty_level() should not be called every time. Each word's difficulty level should be pre-computed.
if ENGLISH_WORD_DIFFICULTY_DICT == {}:
d2 = convert_test_type_to_difficulty_level(d2) # 根据d2的标记评级{'apple': 4, 'abandon': 4, ...}
else:
d2 = ENGLISH_WORD_DIFFICULTY_DICT
stemmer = snowballstemmer.stemmer('english')
for k in d1: # 用户的词
if k in d2: # 如果用户的词以原型的形式存在于词库d2中
continue # 无需评级,跳过
else:
stem = stemmer.stemWord(k)
if stem in d2: # 如果用户的词的词根存在于词库d2的词根库中
d2[k] = d2[stem] # 按照词根进行评级
else:
d2[k] = 3 # 如果k的词根都不在那么就当认为是3级
return d2
def get_word_difficulty(self, word):
"""
Get difficulty level for a word using pre-computed values
:param word: Word to check
:return: Difficulty level
"""
if word in self.word_difficulty_dict:
return self.word_difficulty_dict[word]
stem = self.stemmer.stemWord(word)
if stem in self.word_difficulty_dict:
self.word_difficulty_dict[word] = self.word_difficulty_dict[stem]
return self.word_difficulty_dict[word]
self.word_difficulty_dict[word] = 0 # default level for unknown
return 0
def revert_dict(d):
@ -99,52 +109,61 @@ def user_difficulty_level(d_user, d, calc_func=0):
two ways to calculate difficulty_level
set calc_func!=0 to use sqrt, otherwise use weighted average
'''
if calc_func != 0:
# calculation function 1: sqrt
# Safety checks
if not d_user or not d:
return 4.5 # Return default level if either dictionary is empty
try:
if calc_func != 0:
# calculation function 1: sqrt
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
geometric = 0
count = 0
for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important
lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
if word in d:
lst2.append((word, d[word]))
lst3 = sort_in_ascending_order(lst2) # easiest tuple first
for t in lst3:
word = t[0]
hard = t[1]
if hard > 0: # Prevent log(0)
geometric = geometric + math.log(hard)
count += 1
return max(4.5, math.exp(geometric / max(count, 1)))
# calculation function 2: weighted average
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
geometric = 0
count = 0
for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important while determining user's level
count = {} # number of all kinds of words
percentages = {} # percentages of all kinds of difficulties
total = 0 # total words
for date in d_user2.keys():
lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
if word in d:
lst2.append((word, d[word]))
if d[word] not in count:
count[d[word]] = 0
count[d[word]] += 1
total += 1
lst3 = sort_in_ascending_order(lst2) # easiest tuple first
# print(lst3)
for t in lst3:
word = t[0]
hard = t[1]
# print('WORD %s HARD %4.2f' % (word, hard))
geometric = geometric + math.log(hard)
count += 1
return math.exp(geometric / max(count, 1))
# calculation function 2: weighted average
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = {} # number of all kinds of words
percentages = {} # percentages of all kinds of difficulties
total = 0 # total words
for date in d_user2.keys():
lst = d_user2[date] # a list of words
for word in lst:
if word in d:
if d[word] not in count:
count[d[word]] = 0
count[d[word]] += 1
total += 1
if total == 0:
return 1
for k in count.keys():
percentages[k] = count[k] / total
weight = map_percentages_to_levels(percentages)
sum = 0
for k in weight.keys():
sum += weight[k] * k
return sum
if total == 0:
return 4.5 # Changed default level
for k in count.keys():
percentages[k] = count[k] / total
weight = map_percentages_to_levels(percentages)
sum = 0
for k in weight.keys():
sum += weight[k] * k
return max(4.5, sum) # Ensure minimum level of 4.5
except Exception as e:
print(f"Error calculating user difficulty level: {str(e)}")
return 4.5 # Return default level on error
@ -174,6 +193,225 @@ def text_difficulty_level(s, d):
return geometric ** (1 / max(count, 1))
def load_record(fname):
"""
Load a pickle file containing word records
:param fname: Path to the pickle file
:return: Dictionary containing the loaded data
"""
# Get the directory where the script is located
script_dir = os.path.dirname(os.path.abspath(__file__))
# Build paths relative to the script location
if fname == 'frequency.p':
path = os.path.join(script_dir, fname) # same directory as script
else:
path = os.path.join(script_dir, 'static', fname) # static subfolder
try:
with open(path, 'rb') as f:
return pickle.load(f)
except FileNotFoundError:
print(f"Warning: Could not find file: {path}")
return {}
def get_difficulty_level_for_user(frequency_dict, word_test_dict):
"""
Convert word test data into difficulty levels
:param frequency_dict: Dictionary containing word frequency data
:param word_test_dict: Dictionary containing word test data
:return: Dictionary mapping words to their difficulty levels
"""
difficulty_dict = {}
for word in word_test_dict:
if 'CET4' in word_test_dict[word]:
difficulty_dict[word] = 4
elif 'OXFORD3000' in word_test_dict[word]:
difficulty_dict[word] = 5
elif 'CET6' in word_test_dict[word] or 'GRADUATE' in word_test_dict[word]:
difficulty_dict[word] = 6
elif 'OXFORD5000' in word_test_dict[word] or 'IELTS' in word_test_dict[word]:
difficulty_dict[word] = 7
elif 'BBC' in word_test_dict[word]:
difficulty_dict[word] = 8
else:
difficulty_dict[word] = 3 # default level
return difficulty_dict
class VocabularyLevelEstimator:
"""A class to estimate vocabulary levels based on Oxford word levels"""
def __init__(self, word_data_path=None):
if word_data_path is None:
word_data_path = 'db/oxford_words.txt'
self.word_levels = {}
self.level_mapping = {
'A1': 3,
'A2': 4,
'B1': 5,
'B2': 6,
'C1': 7
}
if word_data_path:
self._load_word_data(word_data_path)
def _load_word_data(self, filepath):
"""Load word data from Oxford word list file"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
for line in f:
parts = line.strip().split()
if len(parts) >= 3:
word = parts[0].strip().lower()
level_code = parts[-1].strip()
if level_code in self.level_mapping:
level = self.level_mapping[level_code]
self.word_levels[word] = level
except FileNotFoundError:
print(f"Warning: Could not find difficulty data file: {filepath}")
def get_word_level(self, word):
"""Get difficulty level for a single word"""
if word is None:
raise TypeError("Word cannot be None")
if not isinstance(word, str):
raise TypeError("Word must be a string")
if not word:
return 0 # Default level for empty/invalid
word = word.lower()
return self.word_levels.get(word, 0) # Default to level 0 if word not found
def estimate_text_level(self, text):
"""Estimate the difficulty level of a text"""
if text is None:
raise TypeError("Input text cannot be None")
if not isinstance(text, str):
raise TypeError("Input text must be a string")
if not text:
return 3 # Default level for empty string
words = text.lower().split()
if not words:
return 3
levels = [self.get_word_level(word) for word in words]
return sum(levels) / len(levels)
def estimate_user_level(self, word_history):
"""Estimate user's vocabulary level based on their word history"""
if word_history is None:
raise TypeError("Word history cannot be None")
if not isinstance(word_history, dict):
raise TypeError("Word history must be a dictionary")
# Validate the word history format
for word, value in word_history.items():
if not isinstance(word, str):
raise ValueError("Word history keys must be strings")
if not isinstance(value, (list, int)):
raise ValueError("Word history values must be lists or integers")
if not word_history:
return 3 # Default level for empty history
words = word_history.keys()
levels = [self.get_word_level(word) for word in words]
return sum(levels) / len(levels)
class UserVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, word_history, word_data_path=None):
if word_data_path is None:
word_data_path = 'db/oxford_words.txt'
super().__init__(word_data_path)
self.word_history = word_history
self._level = None
@property
def level(self):
if self._level is None:
if not self.word_history:
self._level = 0
return self._level
# Gather all (timestamp, word) pairs
word_times = []
for word, times in self.word_history.items():
for t in times:
word_times.append((t, word))
if not word_times:
self._level = 0
return self._level
# Sort by timestamp descending
word_times.sort(reverse=True)
recent_words = []
seen = set()
for t, word in word_times:
clean_word = word.strip(string.punctuation).lower()
if clean_word not in seen and self.is_valid_word(clean_word):
recent_words.append(clean_word)
seen.add(clean_word)
if len(recent_words) == 3:
break
if not recent_words:
self._level = 0
return self._level
levels = [self.get_word_level(word) for word in recent_words]
if all(l == 0 for l in levels):
self._level = 0
else:
self._level = max(levels) + 0.1 * (len(levels) - 1)
return self._level
def is_valid_word(self, word):
return word.isalpha()
class ArticleVocabularyLevel(VocabularyLevelEstimator):
def __init__(self, content, word_data_path=None):
if word_data_path is None:
word_data_path = 'db/oxford_words.txt'
super().__init__(word_data_path)
self.content = content
self._level = None
@property
def level(self):
if self._level is None:
if not self.content:
self._level = 0
return self._level
words = [w.strip(string.punctuation).lower() for w in self.content.split()]
words = [w for w in words if w and w.isalpha()]
if not words:
self._level = 0
return self._level
word_levels = [self.get_word_level(w) for w in words]
word_levels = [l for l in word_levels if l > 0]
if not word_levels:
self._level = 0
else:
if len(word_levels) == 1:
self._level = word_levels[0]
elif len(word_levels) <= 3:
avg = sum(word_levels) / len(word_levels)
# Add a small bonus for each extra word to ensure superset > subset
bonus = 0.01 * (len(word_levels) - 1)
self._level = max(avg, max(word_levels) + bonus)
else:
word_levels.sort(reverse=True)
hardest = word_levels[:10]
self._level = max(sum(hardest) / len(hardest), max(hardest) + 0.01 * (len(hardest) - 1))
return self._level
def is_valid_word(self, word):
return word.isalpha()
if __name__ == '__main__':
d1 = load_record('frequency.p')
# print(d1)
@ -188,7 +426,7 @@ South Lawn
11:53 A.M. EDT
THE PRESIDENT: Hi, everybody. Hi. How are you? So, the stock market is doing very well.
The economy is booming. We have a new record in sight. It could happen even today.
But we have a new stock market record. I think itll be about 118 times that weve broken the record.
But we have a new stock market record. I think it'll be about 118 times that we've broken the record.
Jobs look phenomenal.
'''
s = '''
@ -199,22 +437,22 @@ By the authority vested in me as President by the Constitution and the laws of t
s = '''
Democrats keep their witnesses locked behind secure doors, then flood the press with carefully sculpted leaks and accusations, driving the Trump-corruption narrative. And so the party goes, galloping toward an impeachment vote that would overturn the will of the American voterson a case built in secret.
Conservative commentators keep noting that Mrs. Pelosis refusal to hold a vote on the House floor to authorize an official impeachment inquiry helps her caucuss vulnerable members evade accountability. But theres a more practical and uglier reason for Democrats to skip the formalities. Normally an authorization vote would be followed by official rules on how the inquiry would proceed. Under todays process, Mr. Schiff gets to make up the rules as he goes along. Behold the Lord High Impeacher.
Conservative commentators keep noting that Mrs. Pelosi's refusal to hold a vote on the House floor to authorize an official impeachment inquiry helps her caucus's vulnerable members evade accountability. But there's a more practical and uglier reason for Democrats to skip the formalities. Normally an authorization vote would be followed by official rules on how the inquiry would proceed. Under today's process, Mr. Schiff gets to make up the rules as he goes along. Behold the Lord High Impeacher.
Democrats view control over the narrative as essential, having learned from their Russia-collusion escapade the perils of transparency. They banked on special counsel Robert Muellers investigation proving impeachment fodder, but got truth-bombed. Their subsequent open hearings on the subjectfeaturing Michael Cohen, Mr. Mueller and Corey Lewandowski were, for the Democrats, embarrassing spectacles, at which Republicans punched gaping holes in their story line.
Democrats view control over the narrative as essential, having learned from their Russia-collusion escapade the perils of transparency. They banked on special counsel Robert Mueller's investigation proving impeachment fodder, but got truth-bombed. Their subsequent open hearings on the subject—featuring Michael Cohen, Mr. Mueller and Corey Lewandowski —were, for the Democrats, embarrassing spectacles, at which Republicans punched gaping holes in their story line.
Mr. Schiff is making sure that doesnt happen again; hell present the story, on his terms. His rules mean he can issue that controlling decree about only one transcript and Democratic staff supervision of Republican members. It means he can bar the public, the press and even fellow representatives from hearings, even though theyre unclassified.
Mr. Schiff is making sure that doesn't happen again; he'll present the story, on his terms. His rules mean he can issue that controlling decree about "only one" transcript and Democratic staff supervision of Republican members. It means he can bar the public, the press and even fellow representatives from hearings, even though they're unclassified.
'''
s = '''
Unemployment today is at a 50-year low. There are more Americans working today than ever before. Median household income in the last two and half years has risen by more than $5,000. And that doesnt even account for the savings from the Presidents tax cuts or energy reforms for working families.
Unemployment today is at a 50-year low. There are more Americans working today than ever before. Median household income in the last two and half years has risen by more than $5,000. And that doesn't even account for the savings from the President's tax cuts or energy reforms for working families.
Because of the Presidents policies, America has added trillions of dollars of wealth to our economy while Chinas economy continues to fall behind.
Because of the President's policies, America has added trillions of dollars of wealth to our economy while China's economy continues to fall behind.
To level the playing field for the American worker against unethical trade practices, President Trump levied tariffs on $250 billion in Chinese goods in 2018. And earlier this year, the President announced we would place tariffs on another $300 billion of Chinese goods if significant issues in our trading relationship were not resolved by December of this year.
'''
s = '''
Needless to say, we see it very differently. Despite the great power competition that is underway, and Americas growing strength, we want better for China. Thats why, for the first time in decades, under President Donald Trumps leadership, the United States is treating Chinas leaders exactly how the leaders of any great world power should be treated with respect, yes, but also with consistency and candor.
Needless to say, we see it very differently. Despite the great power competition that is underway, and America's growing strength, we want better for China. That's why, for the first time in decades, under President Donald Trump's leadership, the United States is treating China's leaders exactly how the leaders of any great world power should be treated with respect, yes, but also with consistency and candor.
'''
s = '''
Brexit is the scheduled withdrawal of the United Kingdom from the European Union. Following a June 2016 referendum, in which 51.9% voted to leave, the UK government formally announced the country's withdrawal in March 2017, starting a two-year process that was due to conclude with the UK withdrawing on 29 March 2019. As the UK parliament thrice voted against the negotiated withdrawal agreement, that deadline has been extended twice, and is currently 31 October 2019. The Benn Act, passed in September 2019, requires the government to seek a third extension.
@ -222,9 +460,9 @@ Brexit is the scheduled withdrawal of the United Kingdom from the European Union
s = '''
The argument for Brexit
According to the BBC, the push to leave the EU was advocated mostly by the UK Independence Party and was not supported by the Prime Minister, David Cameron. Members of the UK Independence Party argued that Britains participation in the EU was a restrictive element for the country.
According to the BBC, the push to leave the EU was advocated mostly by the UK Independence Party and was not supported by the Prime Minister, David Cameron. Members of the UK Independence Party argued that Britain's participation in the EU was a restrictive element for the country.
As one of the EUs primary initiatives is free movement within the region the partys main arguments centered around regaining border control and reclaiming business rights. In addition, supporters of Brexit cited the high EU membership fees as a negative aspect of participation in the EU. It was argued that if the UK separates itself from the EU, these fees can be used to benefit the UK.
As one of the EU's primary initiatives is free movement within the region the party's main arguments centered around regaining border control and reclaiming business rights. In addition, supporters of Brexit cited the high EU membership fees as a negative aspect of participation in the EU. It was argued that if the UK separates itself from the EU, these fees can be used to benefit the UK.
The argument against Brexit
The Conservative Party and the Prime Minister were strongly in favor of remaining with the EU. As a result of the decision to discontinue its participation in the EU, the Prime Minister has made a public statement that he will be relinquishing his position. He believes that the country needs a leader with the same goals as the majority of the country. He has promised a new PM will be in place by early September.
@ -236,7 +474,7 @@ Leaders in favor of staying also worry about the political backlash that could p
What does Brexit mean for the future?
While the decision marked a huge statement for the UK, the referendum vote is not legally binding. There are still many hurdles that must be dealt with before Brexit can become a reality.
The UK is still subject to the laws of the EU until Britains exit becomes legal. In order for the UK to make its break official, the country needs to invoke Article 50. It is unclear exactly what this process will entail or how long it will take as Britain is the first country to take its leave of the EU. Once Article 50 has been formally invoked, the UK has two years to negotiate its departure with the other member states. But according to the BBC, Extricating the UK from the EU will be extremely complex, and the process could drag on longer than that.
The UK is still subject to the laws of the EU until Britain's exit becomes legal. In order for the UK to make its break official, the country needs to invoke Article 50. It is unclear exactly what this process will entail or how long it will take as Britain is the first country to take its leave of the EU. Once Article 50 has been formally invoked, the UK has two years to negotiate its departure with the other member states. But according to the BBC, "Extricating the UK from the EU will be extremely complex, and the process could drag on longer than that."
Amidst the aftermath of this shocking referendum vote, there is great uncertainty as political leaders decide what this means for the UK.
@ -253,7 +491,7 @@ They are expected to vote on the measure on Monday.
Johnson's announcement comes ahead of an expected decision Friday from the European Union over whether to delay Britain's exit from the bloc for three months.
Britain's leader has been steadfastly opposed to any extension to the nation's scheduled Oct. 31 departure date from the EU, although in a letter to the leader of the opposition Labour Party this week he said he would accept a short technical postponement, "say to 15 or 30 November," to allow lawmakers to implement an EU withdrawal bill.
Britain's leader has been steadfastly opposed to any extension to the nation's scheduled Oct. 31 departure date from the EU, although in a letter to the leader of the opposition Labour Party this week he said he would accept a short technical postponement, "say to 15 or 30 November, to allow lawmakers to implement an EU withdrawal bill.
Johnson's decision to offer to call an election follows lawmakers' rejection of his plan to rush through an EU exit bill that runs to hundreds of pages in just three days. They want more time to scrutinize the legislation and to make sure it does not leave the door open to a possible "no-deal" Brexit during future exit negotiations with the EU that will run through next year. A "no-deal" Brexit could dramatically harm Britain's economy.
@ -263,24 +501,31 @@ Johnson has repeatedly pledged to finalize the first stage, a transition deal, o
'''
s = '''
Thank you very much. We have a Cabinet meeting. Well have a few questions after grace. And, if you would, Ben, please do the honors.
Thank you very much. We have a Cabinet meeting. We'll have a few questions after grace. And, if you would, Ben, please do the honors.
THE PRESIDENT: All right, thank you, Ben. That was a great job. Appreciate it.
The economy is doing fantastically well. Its getting very close to another record. Weve had many records since we won office. Were getting very close to another record. I dont know if anybody saw it: The household median income for eight years of President Bush, it rose $400. For eight years of President Obama, it rose $975. And for two and half years of President Trump they have it down as two and a half years it rose $5,000, not including $2,000 for taxes. So it rose, lets say, $7,000. So in two and a half years, were up $7,000, compared to $1,000, compared to $400. And thats for eight years and eight years.
The economy is doing fantastically well. It's getting very close to another record. We've had many records since we won office. We're getting very close to another record. I don't know if anybody saw it: The household median income for eight years of President Bush, it rose $400. For eight years of President Obama, it rose $975. And for two and half years of President Trump they have it down as two and a half years it rose $5,000, not including $2,000 for taxes. So it rose, let's say, $7,000. So in two and a half years, we're up $7,000, compared to $1,000, compared to $400. And that's for eight years and eight years.
Thats a number that just came out, but thats a number that I dont know how there could be any dispute or any Ive never heard a number like that, meaning the economy is doing fantastically well.
That's a number that just came out, but that's a number that I don't know how there could be any dispute or any — I've never heard a number like that, meaning the economy is doing fantastically well.
We need for our farmers, our manufacturers, for, frankly, unions and non-unions, we need USMCA to be voted on. If its voted on, itll pass. Its up to Nancy Pelosi to put it up. If she puts it up, its going to pass. Its going to be very bipartisan. Its something thats very much needed. Itll be hundreds of thousands of jobs.
We need for our farmers, our manufacturers, for, frankly, unions and non-unions, we need USMCA to be voted on. If it's voted on, it'll pass. It's up to Nancy Pelosi to put it up. If she puts it up, it's going to pass. It's going to be very bipartisan. It's something that's very much needed. It'll be hundreds of thousands of jobs.
'''
# f = open('bbc-fulltext/bbc/entertainment/001.txt')
f = open('wordlist.txt')
s = f.read()
f.close()
try:
base_path = os.path.join(os.path.dirname(__file__), 'db')
file_path = os.path.join(base_path, 'oxford_words.txt')
with open(file_path) as f:
s = f.read()
except FileNotFoundError:
print("Warning: Could not find oxford_words.txt. Using sample text instead.")
s = """Sample text here. Replace this with any default text you want to analyze."""
print(text_difficulty_level(s, d3))
article = ArticleVocabularyLevel('source', word_data_path='db/oxford_words.txt')
user = UserVocabularyLevel({'simple':['202408050930']}, word_data_path='db/oxford_words.txt')

View File

@ -14,21 +14,30 @@ from datetime import datetime
def lst2dict(lst, d):
'''
Store the information in list lst to dictionary d.
Note: nothing is returned.
Handles both frequency counts and date lists.
'''
for x in lst:
word = x[0]
freq = x[1]
if isinstance(x[1], list): # if it's a list of dates
freq = len(x[1]) # convert to frequency
else:
freq = x[1] # already a frequency
if not word in d:
d[word] = freq
d[word] = freq
else:
d[word] += freq
def dict2lst(d):
return list(d.items()) # a list of (key, value) pairs
''' Convert dictionary to list of (word, frequency) pairs '''
if len(d) > 0:
keys = list(d.keys())
if isinstance(d[keys[0]], list):
return [(k, len(v)) for k, v in d.items()]
return list(d.items())
return []
def merge_frequency(lst1, lst2):
d = {}
@ -51,7 +60,10 @@ def save_frequency_to_pickle(d, pickle_fname):
d2 = {}
for k in d:
if not k in exclusion_lst and not k.isnumeric() and len(k) > 1:
d2[k] = d[k]
if isinstance(d[k], list):
d2[k] = len(d[k]) # store frequency count
else:
d2[k] = d[k]
pickle.dump(d2, f)
f.close()

View File

@ -14,16 +14,19 @@ from datetime import datetime
def lst2dict(lst, d):
'''
Store the information in list lst to dictionary d.
Note: nothing is returned.
Now stores frequency count instead of dates list.
'''
for x in lst:
word = x[0]
dates = x[1]
if not word in d:
d[word] = dates
if isinstance(x[1], list): # if it's a list of dates
count = len(x[1]) # convert to frequency
else:
d[word] += dates
count = x[1] # already a frequency
if not word in d:
d[word] = count
else:
d[word] += count
def deleteRecord(path,word):
with open(path, 'rb') as f:
@ -39,12 +42,9 @@ def dict2lst(d):
if len(d) > 0:
keys = list(d.keys())
if isinstance(d[keys[0]], int):
lst = []
for k in d:
lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
return lst
return list(d.items()) # return (word, frequency) pairs directly
elif isinstance(d[keys[0]], list):
return list(d.items()) # a list of (key, value) pairs
return [(k, len(v)) for k, v in d.items()] # convert date lists to counts
return []
@ -67,7 +67,10 @@ def save_frequency_to_pickle(d, pickle_fname):
d2 = {}
for k in d:
if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2:
d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k])))
if isinstance(d[k], list):
d2[k] = len(d[k]) # store frequency count instead of dates list
else:
d2[k] = d[k]
pickle.dump(d2, f)
f.close()
@ -75,15 +78,22 @@ def save_frequency_to_pickle(d, pickle_fname):
exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
if __name__ == '__main__':
# Test 1: Convert dates to frequencies
lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])]
d = {}
lst2dict(lst1, d) # d will change
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
lst2dict(lst1, d)
print("Test 1 - Convert dates to frequencies:")
print(d) # Should show: {'apple': 2, 'banana': 1}
# Test 2: Save and load frequencies
save_frequency_to_pickle(d, 'frequency.p')
loaded_d = load_record('frequency.p')
print("\nTest 2 - Load saved frequencies:")
print(loaded_d) # Should match the previous output
# Test 3: Merge frequencies
lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
d = load_record('frequency.p')
lst1 = dict2lst(d)
d = merge_frequency(lst2, lst1)
print(d)
lst1 = dict2lst(loaded_d)
merged_d = merge_frequency(lst2, lst1)
print("\nTest 3 - Merge frequencies:")
print(merged_d) # Should show banana with increased frequency

108
app/test_estimator.py Normal file
View File

@ -0,0 +1,108 @@
import pytest
from difficulty import VocabularyLevelEstimator
@pytest.fixture
def estimator():
"""Fixture to create a VocabularyLevelEstimator instance"""
return VocabularyLevelEstimator('path/to/your/actual/word_data.p')
class TestVocabularyLevelEstimator:
# Normal input tests
def test_normal_text_estimation(self, estimator):
"""Test text level estimation with normal English text"""
text = """The quick brown fox jumps over the lazy dog.
This text contains common English words that
should be processed without any issues."""
level = estimator.estimate_text_level(text)
assert isinstance(level, float)
assert 3 <= level <= 8 # Difficulty levels should be between 3-8
def test_normal_user_level(self, estimator):
"""Test user level estimation with normal word history"""
word_history = {
'algorithm': ['20240101'],
'computer': ['20240101', '20240102'],
'programming': ['20240101']
}
level = estimator.estimate_user_level(word_history)
assert isinstance(level, float)
assert 3 <= level <= 8
def test_normal_word_level(self, estimator):
"""Test word level estimation with common words"""
assert estimator.get_word_level('computer') >= 3
assert estimator.get_word_level('algorithm') >= 3
# Boundary input tests
def test_empty_text(self, estimator):
"""Test behavior with empty text"""
assert estimator.estimate_text_level('') == 3 # Default level
def test_single_word_text(self, estimator):
"""Test behavior with single-word text"""
assert isinstance(estimator.estimate_text_level('Hello'), float)
def test_empty_user_history(self, estimator):
"""Test behavior with empty user history"""
assert estimator.estimate_user_level({}) == 3 # Default level
def test_maximum_word_length(self, estimator):
"""Test behavior with extremely long word"""
long_word = 'a' * 100
assert estimator.get_word_level(long_word) == 3 # Default level
# Abnormal input tests
def test_non_english_text(self, estimator):
"""Test behavior with non-English text"""
chinese_text = "这是中文文本"
assert estimator.estimate_text_level(chinese_text) == 3 # Default level
def test_special_characters(self, estimator):
"""Test behavior with special characters"""
special_chars = "@#$%^&*()"
assert estimator.estimate_text_level(special_chars) == 3 # Default level
def test_invalid_word_history(self, estimator):
"""Test behavior with invalid word history format"""
invalid_history = {'word': 'not_a_list'}
with pytest.raises(ValueError):
estimator.estimate_user_level(invalid_history)
def test_none_input(self, estimator):
"""Test behavior with None input"""
with pytest.raises(TypeError):
estimator.estimate_text_level(None)
with pytest.raises(TypeError):
estimator.estimate_user_level(None)
with pytest.raises(TypeError):
estimator.get_word_level(None)
# Edge cases
def test_mixed_case_words(self, estimator):
"""Test behavior with mixed case words"""
assert estimator.get_word_level('Computer') == estimator.get_word_level('computer')
def test_whitespace_handling(self, estimator):
"""Test behavior with various whitespace patterns"""
text_with_spaces = " Multiple Spaces Between Words "
level = estimator.estimate_text_level(text_with_spaces)
assert isinstance(level, float)
def test_repeated_words(self, estimator):
"""Test behavior with repeated words"""
text = "word word word word word"
level = estimator.estimate_text_level(text)
assert isinstance(level, float)
def test_numeric_input(self, estimator):
"""Test behavior with numeric input"""
assert estimator.estimate_text_level("123 456 789") == 3 # Default level
def test_mixed_content(self, estimator):
"""Test behavior with mixed content (numbers, words, special chars)"""
mixed_text = "Hello123 @World! 456"
level = estimator.estimate_text_level(mixed_text)
assert isinstance(level, float)

View File

@ -30,9 +30,9 @@ def get_next_article(username):
session['old_articleID'] = session.get('articleID')
if request.method == 'GET':
visited_articles = session.get("visited_articles")
if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来,无需index+=1
if visited_articles['article_ids'][-1] == "null": # 如果当前还是"null",则将"null"pop出来,无需index+=1
visited_articles['article_ids'].pop()
else: # 当前不为“null”,直接 index+=1
else: # 当前不为"null",直接 index+=1
visited_articles["index"] += 1
session["visited_articles"] = visited_articles
logging.debug('/get_next_article: start calling get_today_arcile()')
@ -56,7 +56,7 @@ def get_pre_article(username):
data=''
else:
visited_articles["index"] -= 1 # 上一篇index-=1
if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来
if visited_articles['article_ids'][-1] == "null": # 如果当前还是"null",则将"null"pop出来
visited_articles['article_ids'].pop()
session["visited_articles"] = visited_articles
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
@ -140,29 +140,43 @@ def userpage(username):
return render_template('userpage_post.html',username=username,lst = lst, yml=Yaml.yml)
elif request.method == 'GET': # when we load a html page
d = load_freq_history(user_freq_record)
lst = pickle_idea2.dict2lst(d)
lst2 = []
for t in lst:
lst2.append((t[0], len(t[1])))
lst3 = sort_in_descending_order(lst2)
words = ''
for x in lst3:
words += x[0] + ' '
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
session['visited_articles'] = visited_articles
# 通过 today_article加载前端的显示页面
return render_template('userpage_get.html',
admin_name=ADMIN_NAME,
username=username,
session=session,
# flashed_messages=get_flashed_messages(), 仅有删除单词的时候使用到flash而删除单词是异步执行这里的信息提示是同步执行所以就没有存在的必要了
today_article=today_article,
result_of_generate_article=result_of_generate_article,
d_len=len(d),
lst3=lst3,
yml=Yaml.yml,
words=words)
try:
d = load_freq_history(user_freq_record)
lst = pickle_idea2.dict2lst(d)
lst2 = []
for t in lst:
if isinstance(t[1], (list, tuple)): # Check if t[1] is a list or tuple
lst2.append((t[0], len(t[1])))
elif isinstance(t[1], int): # Handle case where t[1] is an integer
lst2.append((t[0], t[1]))
else:
lst2.append((t[0], 1)) # Default case
lst3 = sort_in_descending_order(lst2)
words = ''
for x in lst3:
words += x[0] + ' '
visited_articles, today_article, result_of_generate_article = get_today_article(user_freq_record, session.get('visited_articles'))
session['visited_articles'] = visited_articles
# 通过 today_article加载前端的显示页面
return render_template('userpage_get.html',
admin_name=ADMIN_NAME,
username=username,
session=session,
# flashed_messages=get_flashed_messages(), 仅有删除单词的时候使用到flash而删除单词是异步执行这里的信息提示是同步执行所以就没有存在的必要了
today_article=today_article,
result_of_generate_article=result_of_generate_article,
d_len=len(d),
lst3=lst3,
yml=Yaml.yml,
words=words)
except Exception as e:
print(f"Error in userpage: {str(e)}")
return render_template('userpage_get.html',
username=username,
today_article={"user_level": 4.5}, # Default level
lst3=[],
d_len=0)
@userService.route("/<username>/mark", methods=['GET', 'POST'])
def user_mark_word(username):