import pytest from vocabulary import Vocabulary # 示例词汇字典 sample_word_difficulty_dict = { 'apple': 4, # CET4 'banana': 6, # CET6 'education': 5, # OXFORD3000 'intelligent': 7, # OXFORD5000 'BBC': 8 # BBC } # 实例化 Vocabulary estimator = Vocabulary(sample_word_difficulty_dict) # 测试:正常输入 def test_get_word_difficulty(): # 对于已知的单词,直接使用 get_word_difficulty 方法获取难度 assert estimator.get_word_difficulty('apple') == 4 assert estimator.get_word_difficulty('banana') == 6 assert estimator.get_word_difficulty('education') == 5 assert estimator.get_word_difficulty('intelligent') == 7 assert estimator.get_word_difficulty('BBC') == 8 def test_get_text_difficulty(): paragraph = 'apple banana education intelligent BBC' # 使用 get_text_difficulty 方法计算文本的平均难度 avg_difficulty = estimator.get_text_difficulty(paragraph) expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值 assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 允许误差范围 # 测试:边界输入 def test_empty_paragraph(): paragraph = '' avg_difficulty = estimator.get_text_difficulty(paragraph) assert avg_difficulty == 0 def test_single_word(): paragraph = 'apple' avg_difficulty = estimator.get_text_difficulty(paragraph) assert avg_difficulty == 4 # 'apple' 的难度应该是 4 # 测试:异常输入 def test_word_not_in_dict(): # 确保未知单词返回默认难度 3 assert estimator.get_word_difficulty('unknown_word') == 3 def test_paragraph_with_unknown_words(): paragraph = 'apple banana unknown_word' word_list = paragraph.split() difficulties = [ estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3 for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) expected_avg_difficulty = (4 + 6 + 3) / 3 # 'unknown_word' 应返回默认难度 3 assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 额外的测试用例 # 1. 测试输入大段文字的情况 def test_large_paragraph(): paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000) # 重复1000次 word_list = paragraph.split() difficulties = [ estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3 for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 # 'apple', 'banana', 'education', 'intelligent', 'BBC' assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 2. 测试单词有多个难度标签的情况 sample_word_difficulty_dict_multiple = { 'apple': 5, # OXFORD3000 'banana': 8, # BBC 'education': 5, # OXFORD3000 'intelligent': 7, # OXFORD5000 'BBC': 8 # BBC } def test_multiple_difficulty_tags(): difficulty_dict = sample_word_difficulty_dict_multiple # apple 出现在 CET4 和 OXFORD3000 中,应该取最大难度 assert difficulty_dict['apple'] == 5 # OXFORD3000 的难度更高 assert difficulty_dict['banana'] == 8 # BBC 是最高的难度 assert difficulty_dict['education'] == 5 # OXFORD3000 assert difficulty_dict['intelligent'] == 7 # OXFORD5000 assert difficulty_dict['BBC'] == 8 # BBC # 3. 测试所有单词的难度相同 def test_all_words_same_difficulty(): sample_word_difficulty_dict_same = { 'apple': 4, 'banana': 4, 'education': 4, 'intelligent': 4, 'BBC': 4 } difficulty_dict = sample_word_difficulty_dict_same assert difficulty_dict['apple'] == 4 assert difficulty_dict['banana'] == 4 assert difficulty_dict['education'] == 4 assert difficulty_dict['intelligent'] == 4 assert difficulty_dict['BBC'] == 4 paragraph = 'apple banana education intelligent BBC' word_list = paragraph.split() difficulties = [ difficulty_dict.get(word, 3) for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) assert avg_difficulty == 4 # 4. 测试长文本包含多种不同的单词 def test_mixed_difficulty_text(): paragraph = 'apple banana unknown_word random_word BBC intelligent education' word_list = paragraph.split() difficulties = [ estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3 for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7 # 包括未知单词 assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 5. 测试多次调用 get_word_difficulty 对同一单词 def test_repeated_get_word_difficulty(): word = 'banana' difficulty_first = estimator.get_word_difficulty('banana') difficulty_second = estimator.get_word_difficulty('banana') assert difficulty_first == difficulty_second # 确保每次返回的难度一致 # 6. 测试难度返回默认值 def test_default_difficulty_for_unknown_words(): unknown_word = 'xyz' difficulty = estimator.get_word_difficulty('xyz') assert difficulty == 3 # 默认值是3 # 7. 测试复杂的段落 def test_complex_paragraph_difficulty(): paragraph = 'apple banana unknown_word random_word BBC intelligent education' word_list = paragraph.split() difficulties = [ estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3 for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7 assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 8. 测试特殊字符和标点符号 def test_paragraph_with_punctuation(): paragraph = 'apple, banana; education! intelligent... BBC?' word_list = paragraph.split() # 假设是通过空格分隔,实际上你可能需要更复杂的分割逻辑来处理标点 word_list = [word.strip('.,;!?') for word in word_list] # 去掉标点 difficulties = [ estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3 for word in word_list ] avg_difficulty = sum(difficulties) / len(difficulties) expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 运行测试 if __name__ == '__main__': pytest.main()