docs(examples): add chatbot testing example
Demonstrates pytest integration for chatbot QA with validate_text() assertions, fixtures, and parametrised content safety tests.
This commit is contained in:
140
examples/chatbot_testing.py
Normal file
140
examples/chatbot_testing.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Pytest integration for chatbot testing.
|
||||
|
||||
Demonstrates Veritext's pytest plugin for testing chatbot responses:
|
||||
- validate_text() assertion function
|
||||
- Custom test fixtures
|
||||
- Test organisation with markers
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from veritext.pytest_plugin import validate_text
|
||||
|
||||
# Sample chatbot responses for testing
|
||||
CHATBOT_RESPONSES = {
|
||||
"greeting": {
|
||||
"input": "Hello!",
|
||||
"response": "Hi there! How can I help you today?",
|
||||
"expected_keywords": ["help", "hi"],
|
||||
},
|
||||
"weather": {
|
||||
"input": "What's the weather like?",
|
||||
"response": "I don't have access to real-time weather data, but you can "
|
||||
"check a weather service like weather.com for current conditions.",
|
||||
"expected_keywords": ["weather", "check"],
|
||||
},
|
||||
"farewell": {
|
||||
"input": "Goodbye!",
|
||||
"response": "Goodbye! Have a great day!",
|
||||
"expected_keywords": ["goodbye", "day"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Fixtures for common test setup
|
||||
@pytest.fixture
|
||||
def greeting_response() -> str:
|
||||
"""Provide a sample greeting response."""
|
||||
return CHATBOT_RESPONSES["greeting"]["response"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def weather_response() -> str:
|
||||
"""Provide a sample weather response."""
|
||||
return CHATBOT_RESPONSES["weather"]["response"]
|
||||
|
||||
|
||||
# Basic validation tests
|
||||
class TestResponseQuality:
|
||||
"""Test chatbot response quality using Veritext."""
|
||||
|
||||
def test_greeting_length(self, greeting_response: str) -> None:
|
||||
"""Greeting responses should be concise."""
|
||||
validate_text(
|
||||
greeting_response,
|
||||
min_length=10,
|
||||
max_length=100,
|
||||
)
|
||||
|
||||
def test_greeting_readability(self, greeting_response: str) -> None:
|
||||
"""Greeting responses should be easy to read."""
|
||||
validate_text(
|
||||
greeting_response,
|
||||
max_reading_grade=8.0,
|
||||
)
|
||||
|
||||
def test_greeting_contains_keywords(self, greeting_response: str) -> None:
|
||||
"""Greeting should contain expected terms."""
|
||||
validate_text(
|
||||
greeting_response,
|
||||
must_contain=["help"],
|
||||
)
|
||||
|
||||
def test_weather_response_quality(self, weather_response: str) -> None:
|
||||
"""Weather response should be informative and readable."""
|
||||
validate_text(
|
||||
weather_response,
|
||||
min_length=50,
|
||||
max_length=500,
|
||||
max_reading_grade=10.0,
|
||||
must_contain=["weather"],
|
||||
)
|
||||
|
||||
|
||||
# Tests with reference comparison
|
||||
class TestResponseSimilarity:
|
||||
"""Test response similarity against reference texts."""
|
||||
|
||||
def test_greeting_similarity(self) -> None:
|
||||
"""Greeting should match expected style."""
|
||||
reference = "Hello! How may I assist you today?"
|
||||
response = CHATBOT_RESPONSES["greeting"]["response"]
|
||||
|
||||
validate_text(
|
||||
response,
|
||||
reference=reference,
|
||||
min_rouge=0.3, # Allow variation in wording
|
||||
min_length=10,
|
||||
)
|
||||
|
||||
def test_farewell_similarity(self) -> None:
|
||||
"""Farewell should match expected style."""
|
||||
reference = "Goodbye! Have a wonderful day!"
|
||||
response = CHATBOT_RESPONSES["farewell"]["response"]
|
||||
|
||||
validate_text(
|
||||
response,
|
||||
reference=reference,
|
||||
min_rouge=0.5,
|
||||
must_contain=["goodbye"],
|
||||
)
|
||||
|
||||
|
||||
# Content safety tests
|
||||
class TestContentSafety:
|
||||
"""Test responses for inappropriate content."""
|
||||
|
||||
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
|
||||
def test_no_profanity(self, response_key: str) -> None:
|
||||
"""Responses should not contain profanity."""
|
||||
response = CHATBOT_RESPONSES[response_key]["response"]
|
||||
validate_text(
|
||||
response,
|
||||
must_exclude=["damn", "hell", "crap"],
|
||||
min_length=1,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
|
||||
def test_no_harmful_content(self, response_key: str) -> None:
|
||||
"""Responses should not contain harmful instructions."""
|
||||
response = CHATBOT_RESPONSES[response_key]["response"]
|
||||
validate_text(
|
||||
response,
|
||||
must_exclude=["hack", "exploit", "attack"],
|
||||
min_length=1,
|
||||
)
|
||||
|
||||
|
||||
# Run tests when executed directly
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user