Files
veritext/examples/chatbot_testing.py
Kai Chappell 9cf968ad36 example: chatbot testing
Demonstrates pytest integration for chatbot QA with validate_text()
assertions, fixtures, and parametrised content safety tests.
2025-05-14 20:00:17 +00:00

136 lines
3.9 KiB
Python

"""Pytest integration for chatbot testing.
Demonstrates Veritext's pytest plugin for testing chatbot responses:
- validate_text() assertion function
- Custom test fixtures
- Test organisation with markers
"""
import pytest
from veritext.pytest_plugin import validate_text
# Sample chatbot responses for testing
CHATBOT_RESPONSES = {
"greeting": {
"input": "Hello!",
"response": "Hi there! How can I help you today?",
"expected_keywords": ["help", "hi"],
},
"weather": {
"input": "What's the weather like?",
"response": "I don't have access to real-time weather data, but you can "
"check a weather service like weather.com for current conditions.",
"expected_keywords": ["weather", "check"],
},
"farewell": {
"input": "Goodbye!",
"response": "Goodbye! Have a great day!",
"expected_keywords": ["goodbye", "day"],
},
}
# Fixtures for common test setup
@pytest.fixture
def greeting_response() -> str:
return CHATBOT_RESPONSES["greeting"]["response"]
@pytest.fixture
def weather_response() -> str:
return CHATBOT_RESPONSES["weather"]["response"]
# Basic validation tests
class TestResponseQuality:
"""Test chatbot response quality using Veritext."""
def test_greeting_length(self, greeting_response: str) -> None:
validate_text(
greeting_response,
min_length=10,
max_length=100,
)
def test_greeting_readability(self, greeting_response: str) -> None:
validate_text(
greeting_response,
max_reading_grade=8.0,
)
def test_greeting_contains_keywords(self, greeting_response: str) -> None:
validate_text(
greeting_response,
must_contain=["help"],
)
def test_weather_response_quality(self, weather_response: str) -> None:
"""Weather response should be informative and readable."""
validate_text(
weather_response,
min_length=50,
max_length=500,
max_reading_grade=10.0,
must_contain=["weather"],
)
# Tests with reference comparison
class TestResponseSimilarity:
"""Test response similarity against reference texts."""
def test_greeting_similarity(self) -> None:
"""Greeting should match expected style."""
reference = "Hello! How may I assist you today?"
response = CHATBOT_RESPONSES["greeting"]["response"]
validate_text(
response,
reference=reference,
min_rouge=0.3, # Allow variation in wording
min_length=10,
)
def test_farewell_similarity(self) -> None:
"""Farewell should match expected style."""
reference = "Goodbye! Have a wonderful day!"
response = CHATBOT_RESPONSES["farewell"]["response"]
validate_text(
response,
reference=reference,
min_rouge=0.5,
must_contain=["goodbye"],
)
# Content safety tests
class TestContentSafety:
"""Test responses for inappropriate content."""
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
def test_no_profanity(self, response_key: str) -> None:
"""Responses should not contain profanity."""
response = CHATBOT_RESPONSES[response_key]["response"]
validate_text(
response,
must_exclude=["damn", "hell", "crap"],
min_length=1,
)
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
def test_no_harmful_content(self, response_key: str) -> None:
"""Responses should not contain harmful instructions."""
response = CHATBOT_RESPONSES[response_key]["response"]
validate_text(
response,
must_exclude=["hack", "exploit", "attack"],
min_length=1,
)
# Run tests when executed directly
if __name__ == "__main__":
pytest.main([__file__, "-v"])