EnglishPal/app/test_estimator.py

import pytest
from vocabulary import Vocabulary

# 示例词汇字典
sample_word_difficulty_dict = {
    'apple': 4,  # CET4
    'banana': 6,  # CET6
    'education': 5,  # OXFORD3000
    'intelligent': 7,  # OXFORD5000
    'BBC': 8  # BBC
}

# 实例化 Vocabulary
estimator = Vocabulary(sample_word_difficulty_dict)

# 测试：正常输入
def test_get_word_difficulty():
    # 对于已知的单词，直接使用 get_word_difficulty 方法获取难度
    assert estimator.get_word_difficulty('apple') == 4
    assert estimator.get_word_difficulty('banana') == 6
    assert estimator.get_word_difficulty('education') == 5
    assert estimator.get_word_difficulty('intelligent') == 7
    assert estimator.get_word_difficulty('BBC') == 8

def test_get_text_difficulty():
    paragraph = 'apple banana education intelligent BBC'

    # 使用 get_text_difficulty 方法计算文本的平均难度
    avg_difficulty = estimator.get_text_difficulty(paragraph)
    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2  # 允许误差范围


# 测试：边界输入
def test_empty_paragraph():
    paragraph = ''
    avg_difficulty = estimator.get_text_difficulty(paragraph)
    assert avg_difficulty == 0


def test_single_word():
    paragraph = 'apple'
    avg_difficulty = estimator.get_text_difficulty(paragraph)
    assert avg_difficulty == 4  # 'apple' 的难度应该是 4


# 测试：异常输入
def test_word_not_in_dict():
    # 确保未知单词返回默认难度 3
    assert estimator.get_word_difficulty('unknown_word') == 3


def test_paragraph_with_unknown_words():
    paragraph = 'apple banana unknown_word'

    word_list = paragraph.split()
    difficulties = [
        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
        for word in word_list
    ]
    avg_difficulty = sum(difficulties) / len(difficulties)
    expected_avg_difficulty = (4 + 6 + 3) / 3  # 'unknown_word' 应返回默认难度 3

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2


# 额外的测试用例

# 1. 测试输入大段文字的情况
def test_large_paragraph():
    paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000)  # 重复1000次
    word_list = paragraph.split()

    difficulties = [
        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
        for word in word_list
    ]
    avg_difficulty = sum(difficulties) / len(difficulties)
    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5  # 'apple', 'banana', 'education', 'intelligent', 'BBC'

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2


# 2. 测试单词有多个难度标签的情况
sample_word_difficulty_dict_multiple = {
    'apple': 5,  # OXFORD3000
    'banana': 8,  # BBC
    'education': 5,  # OXFORD3000
    'intelligent': 7,  # OXFORD5000
    'BBC': 8  # BBC
}


def test_multiple_difficulty_tags():
    difficulty_dict = sample_word_difficulty_dict_multiple
    # apple 出现在 CET4 和 OXFORD3000 中，应该取最大难度
    assert difficulty_dict['apple'] == 5  # OXFORD3000 的难度更高
    assert difficulty_dict['banana'] == 8  # BBC 是最高的难度
    assert difficulty_dict['education'] == 5  # OXFORD3000
    assert difficulty_dict['intelligent'] == 7  # OXFORD5000
    assert difficulty_dict['BBC'] == 8  # BBC


# 3. 测试所有单词的难度相同
def test_all_words_same_difficulty():
    sample_word_difficulty_dict_same = {
        'apple': 4,
        'banana': 4,
        'education': 4,
        'intelligent': 4,
        'BBC': 4
    }

    difficulty_dict = sample_word_difficulty_dict_same

    assert difficulty_dict['apple'] == 4
    assert difficulty_dict['banana'] == 4
    assert difficulty_dict['education'] == 4
    assert difficulty_dict['intelligent'] == 4
    assert difficulty_dict['BBC'] == 4

    paragraph = 'apple banana education intelligent BBC'
    word_list = paragraph.split()

    difficulties = [
        difficulty_dict.get(word, 3) for word in word_list
    ]

    avg_difficulty = sum(difficulties) / len(difficulties)
    assert avg_difficulty == 4


# 4. 测试长文本包含多种不同的单词
def test_mixed_difficulty_text():
    paragraph = 'apple banana unknown_word random_word BBC intelligent education'

    word_list = paragraph.split()
    difficulties = [
        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
        for word in word_list
    ]

    avg_difficulty = sum(difficulties) / len(difficulties)
    expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7  # 包括未知单词

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2


# 5. 测试多次调用 get_word_difficulty 对同一单词
def test_repeated_get_word_difficulty():
    word = 'banana'
    difficulty_first = estimator.get_word_difficulty('banana')
    difficulty_second = estimator.get_word_difficulty('banana')

    assert difficulty_first == difficulty_second  # 确保每次返回的难度一致


# 6. 测试难度返回默认值
def test_default_difficulty_for_unknown_words():
    unknown_word = 'xyz'
    difficulty = estimator.get_word_difficulty('xyz')

    assert difficulty == 3  # 默认值是3


# 7. 测试复杂的段落
def test_complex_paragraph_difficulty():
    paragraph = 'apple banana unknown_word random_word BBC intelligent education'

    word_list = paragraph.split()
    difficulties = [
        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
        for word in word_list
    ]

    avg_difficulty = sum(difficulties) / len(difficulties)
    expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2


# 8. 测试特殊字符和标点符号
def test_paragraph_with_punctuation():
    paragraph = 'apple, banana; education! intelligent... BBC?'

    word_list = paragraph.split()  # 假设是通过空格分隔，实际上你可能需要更复杂的分割逻辑来处理标点
    word_list = [word.strip('.,;!?') for word in word_list]  # 去掉标点

    difficulties = [
        estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3
        for word in word_list
    ]

    avg_difficulty = sum(difficulties) / len(difficulties)
    expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5

    assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2


# 运行测试
if __name__ == '__main__':
    pytest.main()
Initial commit 2025-04-17 01:32:45 +08:00			`import pytest`
			`from vocabulary import Vocabulary`

			`# 示例词汇字典`
			`sample_word_difficulty_dict = {`
			`'apple': 4, # CET4`
			`'banana': 6, # CET6`
			`'education': 5, # OXFORD3000`
			`'intelligent': 7, # OXFORD5000`
			`'BBC': 8 # BBC`
			`}`

			`# 实例化 Vocabulary`
			`estimator = Vocabulary(sample_word_difficulty_dict)`

			`# 测试：正常输入`
			`def test_get_word_difficulty():`
			`# 对于已知的单词，直接使用 get_word_difficulty 方法获取难度`
			`assert estimator.get_word_difficulty('apple') == 4`
			`assert estimator.get_word_difficulty('banana') == 6`
			`assert estimator.get_word_difficulty('education') == 5`
			`assert estimator.get_word_difficulty('intelligent') == 7`
			`assert estimator.get_word_difficulty('BBC') == 8`

			`def test_get_text_difficulty():`
			`paragraph = 'apple banana education intelligent BBC'`

			`# 使用 get_text_difficulty 方法计算文本的平均难度`
			`avg_difficulty = estimator.get_text_difficulty(paragraph)`
			`expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 - 1.2106110468130113# 几何平均算得难度值`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2 # 允许误差范围`


			`# 测试：边界输入`
			`def test_empty_paragraph():`
			`paragraph = ''`
			`avg_difficulty = estimator.get_text_difficulty(paragraph)`
			`assert avg_difficulty == 0`


			`def test_single_word():`
			`paragraph = 'apple'`
			`avg_difficulty = estimator.get_text_difficulty(paragraph)`
			`assert avg_difficulty == 4 # 'apple' 的难度应该是 4`


			`# 测试：异常输入`
			`def test_word_not_in_dict():`
			`# 确保未知单词返回默认难度 3`
			`assert estimator.get_word_difficulty('unknown_word') == 3`


			`def test_paragraph_with_unknown_words():`
			`paragraph = 'apple banana unknown_word'`

			`word_list = paragraph.split()`
			`difficulties = [`
			`estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3`
			`for word in word_list`
			`]`
			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`expected_avg_difficulty = (4 + 6 + 3) / 3 # 'unknown_word' 应返回默认难度 3`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2`


			`# 额外的测试用例`

			`# 1. 测试输入大段文字的情况`
			`def test_large_paragraph():`
			`paragraph = ' '.join(['apple', 'banana', 'education', 'intelligent', 'BBC'] * 1000) # 重复1000次`
			`word_list = paragraph.split()`

			`difficulties = [`
			`estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3`
			`for word in word_list`
			`]`
			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5 # 'apple', 'banana', 'education', 'intelligent', 'BBC'`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2`


			`# 2. 测试单词有多个难度标签的情况`
			`sample_word_difficulty_dict_multiple = {`
			`'apple': 5, # OXFORD3000`
			`'banana': 8, # BBC`
			`'education': 5, # OXFORD3000`
			`'intelligent': 7, # OXFORD5000`
			`'BBC': 8 # BBC`
			`}`


			`def test_multiple_difficulty_tags():`
			`difficulty_dict = sample_word_difficulty_dict_multiple`
			`# apple 出现在 CET4 和 OXFORD3000 中，应该取最大难度`
			`assert difficulty_dict['apple'] == 5 # OXFORD3000 的难度更高`
			`assert difficulty_dict['banana'] == 8 # BBC 是最高的难度`
			`assert difficulty_dict['education'] == 5 # OXFORD3000`
			`assert difficulty_dict['intelligent'] == 7 # OXFORD5000`
			`assert difficulty_dict['BBC'] == 8 # BBC`


			`# 3. 测试所有单词的难度相同`
			`def test_all_words_same_difficulty():`
			`sample_word_difficulty_dict_same = {`
			`'apple': 4,`
			`'banana': 4,`
			`'education': 4,`
			`'intelligent': 4,`
			`'BBC': 4`
			`}`

			`difficulty_dict = sample_word_difficulty_dict_same`

			`assert difficulty_dict['apple'] == 4`
			`assert difficulty_dict['banana'] == 4`
			`assert difficulty_dict['education'] == 4`
			`assert difficulty_dict['intelligent'] == 4`
			`assert difficulty_dict['BBC'] == 4`

			`paragraph = 'apple banana education intelligent BBC'`
			`word_list = paragraph.split()`

			`difficulties = [`
			`difficulty_dict.get(word, 3) for word in word_list`
			`]`

			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`assert avg_difficulty == 4`


			`# 4. 测试长文本包含多种不同的单词`
			`def test_mixed_difficulty_text():`
			`paragraph = 'apple banana unknown_word random_word BBC intelligent education'`

			`word_list = paragraph.split()`
			`difficulties = [`
			`estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3`
			`for word in word_list`
			`]`

			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7 # 包括未知单词`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2`


			`# 5. 测试多次调用 get_word_difficulty 对同一单词`
			`def test_repeated_get_word_difficulty():`
			`word = 'banana'`
			`difficulty_first = estimator.get_word_difficulty('banana')`
			`difficulty_second = estimator.get_word_difficulty('banana')`

			`assert difficulty_first == difficulty_second # 确保每次返回的难度一致`


			`# 6. 测试难度返回默认值`
			`def test_default_difficulty_for_unknown_words():`
			`unknown_word = 'xyz'`
			`difficulty = estimator.get_word_difficulty('xyz')`

			`assert difficulty == 3 # 默认值是3`


			`# 7. 测试复杂的段落`
			`def test_complex_paragraph_difficulty():`
			`paragraph = 'apple banana unknown_word random_word BBC intelligent education'`

			`word_list = paragraph.split()`
			`difficulties = [`
			`estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3`
			`for word in word_list`
			`]`

			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`expected_avg_difficulty = (4 + 6 + 3 + 3 + 8 + 7 + 5) / 7`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2`


			`# 8. 测试特殊字符和标点符号`
			`def test_paragraph_with_punctuation():`
			`paragraph = 'apple, banana; education! intelligent... BBC?'`

			`word_list = paragraph.split() # 假设是通过空格分隔，实际上你可能需要更复杂的分割逻辑来处理标点`
			`word_list = [word.strip('.,;!?') for word in word_list] # 去掉标点`

			`difficulties = [`
			`estimator.get_word_difficulty(word) if word in sample_word_difficulty_dict else 3`
			`for word in word_list`
			`]`

			`avg_difficulty = sum(difficulties) / len(difficulties)`
			`expected_avg_difficulty = (4 + 6 + 5 + 7 + 8) / 5`

			`assert abs(avg_difficulty - expected_avg_difficulty) < 1e-2`


			`# 运行测试`
			`if __name__ == '__main__':`
			`pytest.main()`