EnglishPal/app/test_estimator.py

import pytest
from difficulty import VocabularyLevelEstimator

@pytest.fixture
def estimator():
    """Fixture to create a VocabularyLevelEstimator instance"""
    return VocabularyLevelEstimator('path/to/your/actual/word_data.p')

class TestVocabularyLevelEstimator:

    # Normal input tests
    def test_normal_text_estimation(self, estimator):
        """Test text level estimation with normal English text"""
        text = """The quick brown fox jumps over the lazy dog.
                 This text contains common English words that
                 should be processed without any issues."""
        level = estimator.estimate_text_level(text)
        assert isinstance(level, float)
        assert 3 <= level <= 8  # Difficulty levels should be between 3-8

    def test_normal_user_level(self, estimator):
        """Test user level estimation with normal word history"""
        word_history = {
            'algorithm': ['20240101'],
            'computer': ['20240101', '20240102'],
            'programming': ['20240101']
        }
        level = estimator.estimate_user_level(word_history)
        assert isinstance(level, float)
        assert 3 <= level <= 8

    def test_normal_word_level(self, estimator):
        """Test word level estimation with common words"""
        assert estimator.get_word_level('computer') >= 3
        assert estimator.get_word_level('algorithm') >= 3

    # Boundary input tests
    def test_empty_text(self, estimator):
        """Test behavior with empty text"""
        assert estimator.estimate_text_level('') == 3  # Default level

    def test_single_word_text(self, estimator):
        """Test behavior with single-word text"""
        assert isinstance(estimator.estimate_text_level('Hello'), float)

    def test_empty_user_history(self, estimator):
        """Test behavior with empty user history"""
        assert estimator.estimate_user_level({}) == 3  # Default level

    def test_maximum_word_length(self, estimator):
        """Test behavior with extremely long word"""
        long_word = 'a' * 100
        assert estimator.get_word_level(long_word) == 3  # Default level

    # Abnormal input tests
    def test_non_english_text(self, estimator):
        """Test behavior with non-English text"""
        chinese_text = "这是中文文本"
        assert estimator.estimate_text_level(chinese_text) == 3  # Default level

    def test_special_characters(self, estimator):
        """Test behavior with special characters"""
        special_chars = "@#$%^&*()"
        assert estimator.estimate_text_level(special_chars) == 3  # Default level

    def test_invalid_word_history(self, estimator):
        """Test behavior with invalid word history format"""
        invalid_history = {'word': 'not_a_list'}
        with pytest.raises(ValueError):
            estimator.estimate_user_level(invalid_history)

    def test_none_input(self, estimator):
        """Test behavior with None input"""
        with pytest.raises(TypeError):
            estimator.estimate_text_level(None)

        with pytest.raises(TypeError):
            estimator.estimate_user_level(None)

        with pytest.raises(TypeError):
            estimator.get_word_level(None)

    # Edge cases
    def test_mixed_case_words(self, estimator):
        """Test behavior with mixed case words"""
        assert estimator.get_word_level('Computer') == estimator.get_word_level('computer')

    def test_whitespace_handling(self, estimator):
        """Test behavior with various whitespace patterns"""
        text_with_spaces = "   Multiple    Spaces    Between    Words   "
        level = estimator.estimate_text_level(text_with_spaces)
        assert isinstance(level, float)

    def test_repeated_words(self, estimator):
        """Test behavior with repeated words"""
        text = "word word word word word"
        level = estimator.estimate_text_level(text)
        assert isinstance(level, float)

    def test_numeric_input(self, estimator):
        """Test behavior with numeric input"""
        assert estimator.estimate_text_level("123 456 789") == 3  # Default level

    def test_mixed_content(self, estimator):
        """Test behavior with mixed content (numbers, words, special chars)"""
        mixed_text = "Hello123 @World! 456"
        level = estimator.estimate_text_level(mixed_text)
        assert isinstance(level, float)