import pytest from difficulty import VocabularyLevelEstimator @pytest.fixture def estimator(): """Fixture to create a VocabularyLevelEstimator instance""" return VocabularyLevelEstimator('path/to/your/actual/word_data.p') class TestVocabularyLevelEstimator: # Normal input tests def test_normal_text_estimation(self, estimator): """Test text level estimation with normal English text""" text = """The quick brown fox jumps over the lazy dog. This text contains common English words that should be processed without any issues.""" level = estimator.estimate_text_level(text) assert isinstance(level, float) assert 3 <= level <= 8 # Difficulty levels should be between 3-8 def test_normal_user_level(self, estimator): """Test user level estimation with normal word history""" word_history = { 'algorithm': ['20240101'], 'computer': ['20240101', '20240102'], 'programming': ['20240101'] } level = estimator.estimate_user_level(word_history) assert isinstance(level, float) assert 3 <= level <= 8 def test_normal_word_level(self, estimator): """Test word level estimation with common words""" assert estimator.get_word_level('computer') >= 3 assert estimator.get_word_level('algorithm') >= 3 # Boundary input tests def test_empty_text(self, estimator): """Test behavior with empty text""" assert estimator.estimate_text_level('') == 3 # Default level def test_single_word_text(self, estimator): """Test behavior with single-word text""" assert isinstance(estimator.estimate_text_level('Hello'), float) def test_empty_user_history(self, estimator): """Test behavior with empty user history""" assert estimator.estimate_user_level({}) == 3 # Default level def test_maximum_word_length(self, estimator): """Test behavior with extremely long word""" long_word = 'a' * 100 assert estimator.get_word_level(long_word) == 3 # Default level # Abnormal input tests def test_non_english_text(self, estimator): """Test behavior with non-English text""" chinese_text = "这是中文文本" assert estimator.estimate_text_level(chinese_text) == 3 # Default level def test_special_characters(self, estimator): """Test behavior with special characters""" special_chars = "@#$%^&*()" assert estimator.estimate_text_level(special_chars) == 3 # Default level def test_invalid_word_history(self, estimator): """Test behavior with invalid word history format""" invalid_history = {'word': 'not_a_list'} with pytest.raises(ValueError): estimator.estimate_user_level(invalid_history) def test_none_input(self, estimator): """Test behavior with None input""" with pytest.raises(TypeError): estimator.estimate_text_level(None) with pytest.raises(TypeError): estimator.estimate_user_level(None) with pytest.raises(TypeError): estimator.get_word_level(None) # Edge cases def test_mixed_case_words(self, estimator): """Test behavior with mixed case words""" assert estimator.get_word_level('Computer') == estimator.get_word_level('computer') def test_whitespace_handling(self, estimator): """Test behavior with various whitespace patterns""" text_with_spaces = " Multiple Spaces Between Words " level = estimator.estimate_text_level(text_with_spaces) assert isinstance(level, float) def test_repeated_words(self, estimator): """Test behavior with repeated words""" text = "word word word word word" level = estimator.estimate_text_level(text) assert isinstance(level, float) def test_numeric_input(self, estimator): """Test behavior with numeric input""" assert estimator.estimate_text_level("123 456 789") == 3 # Default level def test_mixed_content(self, estimator): """Test behavior with mixed content (numbers, words, special chars)""" mixed_text = "Hello123 @World! 456" level = estimator.estimate_text_level(mixed_text) assert isinstance(level, float)