diff --git a/examples/chatbot_testing.py b/examples/chatbot_testing.py new file mode 100644 index 0000000..5414928 --- /dev/null +++ b/examples/chatbot_testing.py @@ -0,0 +1,140 @@ +"""Pytest integration for chatbot testing. + +Demonstrates Veritext's pytest plugin for testing chatbot responses: +- validate_text() assertion function +- Custom test fixtures +- Test organisation with markers +""" + +import pytest + +from veritext.pytest_plugin import validate_text + +# Sample chatbot responses for testing +CHATBOT_RESPONSES = { + "greeting": { + "input": "Hello!", + "response": "Hi there! How can I help you today?", + "expected_keywords": ["help", "hi"], + }, + "weather": { + "input": "What's the weather like?", + "response": "I don't have access to real-time weather data, but you can " + "check a weather service like weather.com for current conditions.", + "expected_keywords": ["weather", "check"], + }, + "farewell": { + "input": "Goodbye!", + "response": "Goodbye! Have a great day!", + "expected_keywords": ["goodbye", "day"], + }, +} + + +# Fixtures for common test setup +@pytest.fixture +def greeting_response() -> str: + """Provide a sample greeting response.""" + return CHATBOT_RESPONSES["greeting"]["response"] + + +@pytest.fixture +def weather_response() -> str: + """Provide a sample weather response.""" + return CHATBOT_RESPONSES["weather"]["response"] + + +# Basic validation tests +class TestResponseQuality: + """Test chatbot response quality using Veritext.""" + + def test_greeting_length(self, greeting_response: str) -> None: + """Greeting responses should be concise.""" + validate_text( + greeting_response, + min_length=10, + max_length=100, + ) + + def test_greeting_readability(self, greeting_response: str) -> None: + """Greeting responses should be easy to read.""" + validate_text( + greeting_response, + max_reading_grade=8.0, + ) + + def test_greeting_contains_keywords(self, greeting_response: str) -> None: + """Greeting should contain expected terms.""" + validate_text( + greeting_response, + must_contain=["help"], + ) + + def test_weather_response_quality(self, weather_response: str) -> None: + """Weather response should be informative and readable.""" + validate_text( + weather_response, + min_length=50, + max_length=500, + max_reading_grade=10.0, + must_contain=["weather"], + ) + + +# Tests with reference comparison +class TestResponseSimilarity: + """Test response similarity against reference texts.""" + + def test_greeting_similarity(self) -> None: + """Greeting should match expected style.""" + reference = "Hello! How may I assist you today?" + response = CHATBOT_RESPONSES["greeting"]["response"] + + validate_text( + response, + reference=reference, + min_rouge=0.3, # Allow variation in wording + min_length=10, + ) + + def test_farewell_similarity(self) -> None: + """Farewell should match expected style.""" + reference = "Goodbye! Have a wonderful day!" + response = CHATBOT_RESPONSES["farewell"]["response"] + + validate_text( + response, + reference=reference, + min_rouge=0.5, + must_contain=["goodbye"], + ) + + +# Content safety tests +class TestContentSafety: + """Test responses for inappropriate content.""" + + @pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"]) + def test_no_profanity(self, response_key: str) -> None: + """Responses should not contain profanity.""" + response = CHATBOT_RESPONSES[response_key]["response"] + validate_text( + response, + must_exclude=["damn", "hell", "crap"], + min_length=1, + ) + + @pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"]) + def test_no_harmful_content(self, response_key: str) -> None: + """Responses should not contain harmful instructions.""" + response = CHATBOT_RESPONSES[response_key]["response"] + validate_text( + response, + must_exclude=["hack", "exploit", "attack"], + min_length=1, + ) + + +# Run tests when executed directly +if __name__ == "__main__": + pytest.main([__file__, "-v"])