Demonstrates pytest integration for chatbot QA with validate_text() assertions, fixtures, and parametrised content safety tests.
141 lines
4.2 KiB
Python
141 lines
4.2 KiB
Python
"""Pytest integration for chatbot testing.
|
|
|
|
Demonstrates Veritext's pytest plugin for testing chatbot responses:
|
|
- validate_text() assertion function
|
|
- Custom test fixtures
|
|
- Test organisation with markers
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from veritext.pytest_plugin import validate_text
|
|
|
|
# Sample chatbot responses for testing
|
|
CHATBOT_RESPONSES = {
|
|
"greeting": {
|
|
"input": "Hello!",
|
|
"response": "Hi there! How can I help you today?",
|
|
"expected_keywords": ["help", "hi"],
|
|
},
|
|
"weather": {
|
|
"input": "What's the weather like?",
|
|
"response": "I don't have access to real-time weather data, but you can "
|
|
"check a weather service like weather.com for current conditions.",
|
|
"expected_keywords": ["weather", "check"],
|
|
},
|
|
"farewell": {
|
|
"input": "Goodbye!",
|
|
"response": "Goodbye! Have a great day!",
|
|
"expected_keywords": ["goodbye", "day"],
|
|
},
|
|
}
|
|
|
|
|
|
# Fixtures for common test setup
|
|
@pytest.fixture
|
|
def greeting_response() -> str:
|
|
"""Provide a sample greeting response."""
|
|
return CHATBOT_RESPONSES["greeting"]["response"]
|
|
|
|
|
|
@pytest.fixture
|
|
def weather_response() -> str:
|
|
"""Provide a sample weather response."""
|
|
return CHATBOT_RESPONSES["weather"]["response"]
|
|
|
|
|
|
# Basic validation tests
|
|
class TestResponseQuality:
|
|
"""Test chatbot response quality using Veritext."""
|
|
|
|
def test_greeting_length(self, greeting_response: str) -> None:
|
|
"""Greeting responses should be concise."""
|
|
validate_text(
|
|
greeting_response,
|
|
min_length=10,
|
|
max_length=100,
|
|
)
|
|
|
|
def test_greeting_readability(self, greeting_response: str) -> None:
|
|
"""Greeting responses should be easy to read."""
|
|
validate_text(
|
|
greeting_response,
|
|
max_reading_grade=8.0,
|
|
)
|
|
|
|
def test_greeting_contains_keywords(self, greeting_response: str) -> None:
|
|
"""Greeting should contain expected terms."""
|
|
validate_text(
|
|
greeting_response,
|
|
must_contain=["help"],
|
|
)
|
|
|
|
def test_weather_response_quality(self, weather_response: str) -> None:
|
|
"""Weather response should be informative and readable."""
|
|
validate_text(
|
|
weather_response,
|
|
min_length=50,
|
|
max_length=500,
|
|
max_reading_grade=10.0,
|
|
must_contain=["weather"],
|
|
)
|
|
|
|
|
|
# Tests with reference comparison
|
|
class TestResponseSimilarity:
|
|
"""Test response similarity against reference texts."""
|
|
|
|
def test_greeting_similarity(self) -> None:
|
|
"""Greeting should match expected style."""
|
|
reference = "Hello! How may I assist you today?"
|
|
response = CHATBOT_RESPONSES["greeting"]["response"]
|
|
|
|
validate_text(
|
|
response,
|
|
reference=reference,
|
|
min_rouge=0.3, # Allow variation in wording
|
|
min_length=10,
|
|
)
|
|
|
|
def test_farewell_similarity(self) -> None:
|
|
"""Farewell should match expected style."""
|
|
reference = "Goodbye! Have a wonderful day!"
|
|
response = CHATBOT_RESPONSES["farewell"]["response"]
|
|
|
|
validate_text(
|
|
response,
|
|
reference=reference,
|
|
min_rouge=0.5,
|
|
must_contain=["goodbye"],
|
|
)
|
|
|
|
|
|
# Content safety tests
|
|
class TestContentSafety:
|
|
"""Test responses for inappropriate content."""
|
|
|
|
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
|
|
def test_no_profanity(self, response_key: str) -> None:
|
|
"""Responses should not contain profanity."""
|
|
response = CHATBOT_RESPONSES[response_key]["response"]
|
|
validate_text(
|
|
response,
|
|
must_exclude=["damn", "hell", "crap"],
|
|
min_length=1,
|
|
)
|
|
|
|
@pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
|
|
def test_no_harmful_content(self, response_key: str) -> None:
|
|
"""Responses should not contain harmful instructions."""
|
|
response = CHATBOT_RESPONSES[response_key]["response"]
|
|
validate_text(
|
|
response,
|
|
must_exclude=["hack", "exploit", "attack"],
|
|
min_length=1,
|
|
)
|
|
|
|
|
|
# Run tests when executed directly
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|