204 lines
6.4 KiB
Python
204 lines
6.4 KiB
Python
|
import pytest
|
||
|
from vocabulary import Vocabulary
|
||
|
|
||
|
# 示例词汇字典
|
||
|
sample_word_difficulty_dict = {
|
||
|
'apple': 4, # CET4
|
||
|
'banana': 6, # CET6
|
||
|
'education': 5, # OXFORD3000
|
||
|
'intelligent': 7, # OXFORD5000
|
||
|
'BBC': 8 # BBC
|
||
|
}
|
||
|
|
||
|
# 实例化 Vocabulary
|
||
|
estimator = Vocabulary(sample_word_difficulty_dict)
|
||
|
|
||
|
# 测试:正常输入
|
||
|
def test_get_word_difficulty():
|
||
|
# 对于已知的单词,直接使用 get_word_difficulty 方法获取难度
|
||
|
assert estimator.get_word_difficulty('apple') == 4
|
||
|
assert estimator.get_word_difficulty('banana') == 6
|
||
|
assert estimator.get_word_difficulty('education') == 5
|
||
|
assert estimator.get_word_difficulty('intelligent') == 7
|
||
|
assert estimator.get_word_difficulty('BBC') == 8
|
||
|
|
||
|
def test_get_text_difficulty():
|
||
|
paragraph = 'apple banana education intelligent BBC'
|
||
|
|
||
|
# 使用 get_text_difficulty 方法计算文本的平均难度
|
||
|
avg_difficulty = estimator.get_text_difficulty(paragraph)
|
||
|
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 允许误差范围
|
||
|
|
||
|
|
||
|
# 测试:边界输入
|
||
|
def test_empty_paragraph():
|
||
|
paragraph = ''
|
||
|
avg_difficulty = estimator.get_text_difficulty(paragraph)
|
||
|
assert avg_difficulty == 0
|
||
|
|
||
|
|
||
|
def test_single_word():
|
||
|
paragraph = 'apple'
|
||
|
avg_difficulty = estimator.get_text_difficulty(paragraph)
|
||
|
assert avg_difficulty == 4 # 'apple' 的难度应该是 4
|
||
|
|
||
|
|
||
|
# 测试:异常输入
|
||
|
def test_word_not_in_dict():
|
||
|
# 确保未知单词返回默认难度 3
|
||
|
assert estimator.get_word_difficulty('unknown_word') == 3
|
||
|
|
||
|
|
||
|
def test_paragraph_with_unknown_words():
|
||
|
paragraph = 'apple banana unknown_word'
|
||
|
|
||
|
word_list = paragraph.split()
|
||
|
difficulties = [
|
||
|
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
|
||
|
for word in word_list
|
||
|
]
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
expected_avg_difficulty = (4 + 6 + 3) / 3 # 'unknown_word' 应返回默认难度 3
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
|
||
|
|
||
|
|
||
|
# 额外的测试用例
|
||
|
|
||
|
# 1. 测试输入大段文字的情况
|
||
|
def test_large_paragraph():
|
||
|
paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000) # 重复1000次
|
||
|
word_list = paragraph.split()
|
||
|
|
||
|
difficulties = [
|
||
|
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
|
||
|
for word in word_list
|
||
|
]
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 # 'apple', 'banana', 'education', 'intelligent', 'BBC'
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
|
||
|
|
||
|
|
||
|
# 2. 测试单词有多个难度标签的情况
|
||
|
sample_word_difficulty_dict_multiple = {
|
||
|
'apple': 5, # OXFORD3000
|
||
|
'banana': 8, # BBC
|
||
|
'education': 5, # OXFORD3000
|
||
|
'intelligent': 7, # OXFORD5000
|
||
|
'BBC': 8 # BBC
|
||
|
}
|
||
|
|
||
|
|
||
|
def test_multiple_difficulty_tags():
|
||
|
difficulty_dict = sample_word_difficulty_dict_multiple
|
||
|
# apple 出现在 CET4 和 OXFORD3000 中,应该取最大难度
|
||
|
assert difficulty_dict['apple'] == 5 # OXFORD3000 的难度更高
|
||
|
assert difficulty_dict['banana'] == 8 # BBC 是最高的难度
|
||
|
assert difficulty_dict['education'] == 5 # OXFORD3000
|
||
|
assert difficulty_dict['intelligent'] == 7 # OXFORD5000
|
||
|
assert difficulty_dict['BBC'] == 8 # BBC
|
||
|
|
||
|
|
||
|
# 3. 测试所有单词的难度相同
|
||
|
def test_all_words_same_difficulty():
|
||
|
sample_word_difficulty_dict_same = {
|
||
|
'apple': 4,
|
||
|
'banana': 4,
|
||
|
'education': 4,
|
||
|
'intelligent': 4,
|
||
|
'BBC': 4
|
||
|
}
|
||
|
|
||
|
difficulty_dict = sample_word_difficulty_dict_same
|
||
|
|
||
|
assert difficulty_dict['apple'] == 4
|
||
|
assert difficulty_dict['banana'] == 4
|
||
|
assert difficulty_dict['education'] == 4
|
||
|
assert difficulty_dict['intelligent'] == 4
|
||
|
assert difficulty_dict['BBC'] == 4
|
||
|
|
||
|
paragraph = 'apple banana education intelligent BBC'
|
||
|
word_list = paragraph.split()
|
||
|
|
||
|
difficulties = [
|
||
|
difficulty_dict.get(word, 3) for word in word_list
|
||
|
]
|
||
|
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
assert avg_difficulty == 4
|
||
|
|
||
|
|
||
|
# 4. 测试长文本包含多种不同的单词
|
||
|
def test_mixed_difficulty_text():
|
||
|
paragraph = 'apple banana unknown_word random_word BBC intelligent education'
|
||
|
|
||
|
word_list = paragraph.split()
|
||
|
difficulties = [
|
||
|
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
|
||
|
for word in word_list
|
||
|
]
|
||
|
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7 # 包括未知单词
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
|
||
|
|
||
|
|
||
|
# 5. 测试多次调用 get_word_difficulty 对同一单词
|
||
|
def test_repeated_get_word_difficulty():
|
||
|
word = 'banana'
|
||
|
difficulty_first = estimator.get_word_difficulty('banana')
|
||
|
difficulty_second = estimator.get_word_difficulty('banana')
|
||
|
|
||
|
assert difficulty_first == difficulty_second # 确保每次返回的难度一致
|
||
|
|
||
|
|
||
|
# 6. 测试难度返回默认值
|
||
|
def test_default_difficulty_for_unknown_words():
|
||
|
unknown_word = 'xyz'
|
||
|
difficulty = estimator.get_word_difficulty('xyz')
|
||
|
|
||
|
assert difficulty == 3 # 默认值是3
|
||
|
|
||
|
|
||
|
# 7. 测试复杂的段落
|
||
|
def test_complex_paragraph_difficulty():
|
||
|
paragraph = 'apple banana unknown_word random_word BBC intelligent education'
|
||
|
|
||
|
word_list = paragraph.split()
|
||
|
difficulties = [
|
||
|
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
|
||
|
for word in word_list
|
||
|
]
|
||
|
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
|
||
|
|
||
|
|
||
|
# 8. 测试特殊字符和标点符号
|
||
|
def test_paragraph_with_punctuation():
|
||
|
paragraph = 'apple, banana; education! intelligent... BBC?'
|
||
|
|
||
|
word_list = paragraph.split() # 假设是通过空格分隔,实际上你可能需要更复杂的分割逻辑来处理标点
|
||
|
word_list = [word.strip('.,;!?') for word in word_list] # 去掉标点
|
||
|
|
||
|
difficulties = [
|
||
|
estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
|
||
|
for word in word_list
|
||
|
]
|
||
|
|
||
|
avg_difficulty = sum(difficulties) / len(difficulties)
|
||
|
expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5
|
||
|
|
||
|
assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2
|
||
|
|
||
|
|
||
|
# 运行测试
|
||
|
if __name__ == '__main__':
|
||
|
pytest.main()
|