EnglishPal/app/test_estimator.py

204 lines
6.4 KiB
Python

import pytest
from vocabulary import Vocabulary
# 示例词汇字典
sample_word_difficulty_dict = {
'apple': 4, # CET4
'banana': 6, # CET6
'education': 5, # OXFORD3000
'intelligent': 7, # OXFORD5000
'BBC': 8 # BBC
}
# 实例化 Vocabulary
estimator = Vocabulary(sample_word_difficulty_dict)
# 测试:正常输入
def test_get_word_difficulty():
# 对于已知的单词,直接使用 get_word_difficulty 方法获取难度
assert estimator.get_word_difficulty('apple') == 4
assert estimator.get_word_difficulty('banana') == 6
assert estimator.get_word_difficulty('education') == 5
assert estimator.get_word_difficulty('intelligent') == 7
assert estimator.get_word_difficulty('BBC') == 8
def test_get_text_difficulty():
paragraph = 'apple banana education intelligent BBC'
# 使用 get_text_difficulty 方法计算文本的平均难度
avg_difficulty = estimator.get_text_difficulty(paragraph)
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 允许误差范围
# 测试:边界输入
def test_empty_paragraph():
paragraph = ''
avg_difficulty = estimator.get_text_difficulty(paragraph)
assert avg_difficulty == 0
def test_single_word():
paragraph = 'apple'
avg_difficulty = estimator.get_text_difficulty(paragraph)
assert avg_difficulty == 4 # 'apple' 的难度应该是 4
# 测试:异常输入
def test_word_not_in_dict():
# 确保未知单词返回默认难度 3
assert estimator.get_word_difficulty('unknown_word') == 3
def test_paragraph_with_unknown_words():
paragraph = 'apple banana unknown_word'
word_list = paragraph.split()
difficulties = [
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
expected_avg_difficulty = (4 + 6 + 3) / 3 # 'unknown_word' 应返回默认难度 3
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
# 额外的测试用例
# 1. 测试输入大段文字的情况
def test_large_paragraph():
paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000) # 重复1000次
word_list = paragraph.split()
difficulties = [
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 # 'apple', 'banana', 'education', 'intelligent', 'BBC'
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
# 2. 测试单词有多个难度标签的情况
sample_word_difficulty_dict_multiple = {
'apple': 5, # OXFORD3000
'banana': 8, # BBC
'education': 5, # OXFORD3000
'intelligent': 7, # OXFORD5000
'BBC': 8 # BBC
}
def test_multiple_difficulty_tags():
difficulty_dict = sample_word_difficulty_dict_multiple
# apple 出现在 CET4 和 OXFORD3000 中,应该取最大难度
assert difficulty_dict['apple'] == 5 # OXFORD3000 的难度更高
assert difficulty_dict['banana'] == 8 # BBC 是最高的难度
assert difficulty_dict['education'] == 5 # OXFORD3000
assert difficulty_dict['intelligent'] == 7 # OXFORD5000
assert difficulty_dict['BBC'] == 8 # BBC
# 3. 测试所有单词的难度相同
def test_all_words_same_difficulty():
sample_word_difficulty_dict_same = {
'apple': 4,
'banana': 4,
'education': 4,
'intelligent': 4,
'BBC': 4
}
difficulty_dict = sample_word_difficulty_dict_same
assert difficulty_dict['apple'] == 4
assert difficulty_dict['banana'] == 4
assert difficulty_dict['education'] == 4
assert difficulty_dict['intelligent'] == 4
assert difficulty_dict['BBC'] == 4
paragraph = 'apple banana education intelligent BBC'
word_list = paragraph.split()
difficulties = [
difficulty_dict.get(word, 3) for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
assert avg_difficulty == 4
# 4. 测试长文本包含多种不同的单词
def test_mixed_difficulty_text():
paragraph = 'apple banana unknown_word random_word BBC intelligent education'
word_list = paragraph.split()
difficulties = [
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7 # 包括未知单词
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
# 5. 测试多次调用 get_word_difficulty 对同一单词
def test_repeated_get_word_difficulty():
word = 'banana'
difficulty_first = estimator.get_word_difficulty('banana')
difficulty_second = estimator.get_word_difficulty('banana')
assert difficulty_first == difficulty_second # 确保每次返回的难度一致
# 6. 测试难度返回默认值
def test_default_difficulty_for_unknown_words():
unknown_word = 'xyz'
difficulty = estimator.get_word_difficulty('xyz')
assert difficulty == 3 # 默认值是3
# 7. 测试复杂的段落
def test_complex_paragraph_difficulty():
paragraph = 'apple banana unknown_word random_word BBC intelligent education'
word_list = paragraph.split()
difficulties = [
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
# 8. 测试特殊字符和标点符号
def test_paragraph_with_punctuation():
paragraph = 'apple, banana; education! intelligent... BBC?'
word_list = paragraph.split() # 假设是通过空格分隔,实际上你可能需要更复杂的分割逻辑来处理标点
word_list = [word.strip('.,;!?') for word in word_list] # 去掉标点
difficulties = [
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
for word in word_list
]
avg_difficulty = sum(difficulties) / len(difficulties)
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
# 运行测试
if __name__ == '__main__':
pytest.main()