veritext/examples/chatbot_testing.py

"""Pytest integration for chatbot testing.

Demonstrates Veritext's pytest plugin for testing chatbot responses:
- validate_text() assertion function
- Custom test fixtures
- Test organisation with markers
"""

import pytest

from veritext.pytest_plugin import validate_text

# Sample chatbot responses for testing
CHATBOT_RESPONSES = {
    "greeting": {
        "input": "Hello!",
        "response": "Hi there! How can I help you today?",
        "expected_keywords": ["help", "hi"],
    },
    "weather": {
        "input": "What's the weather like?",
        "response": "I don't have access to real-time weather data, but you can "
        "check a weather service like weather.com for current conditions.",
        "expected_keywords": ["weather", "check"],
    },
    "farewell": {
        "input": "Goodbye!",
        "response": "Goodbye! Have a great day!",
        "expected_keywords": ["goodbye", "day"],
    },
}


# Fixtures for common test setup
@pytest.fixture
def greeting_response() -> str:
    return CHATBOT_RESPONSES["greeting"]["response"]


@pytest.fixture
def weather_response() -> str:
    return CHATBOT_RESPONSES["weather"]["response"]


# Basic validation tests
class TestResponseQuality:
    """Test chatbot response quality using Veritext."""

    def test_greeting_length(self, greeting_response: str) -> None:
        validate_text(
            greeting_response,
            min_length=10,
            max_length=100,
        )

    def test_greeting_readability(self, greeting_response: str) -> None:
        validate_text(
            greeting_response,
            max_reading_grade=8.0,
        )

    def test_greeting_contains_keywords(self, greeting_response: str) -> None:
        validate_text(
            greeting_response,
            must_contain=["help"],
        )

    def test_weather_response_quality(self, weather_response: str) -> None:
        """Weather response should be informative and readable."""
        validate_text(
            weather_response,
            min_length=50,
            max_length=500,
            max_reading_grade=10.0,
            must_contain=["weather"],
        )


# Tests with reference comparison
class TestResponseSimilarity:
    """Test response similarity against reference texts."""

    def test_greeting_similarity(self) -> None:
        """Greeting should match expected style."""
        reference = "Hello! How may I assist you today?"
        response = CHATBOT_RESPONSES["greeting"]["response"]

        validate_text(
            response,
            reference=reference,
            min_rouge=0.3,  # Allow variation in wording
            min_length=10,
        )

    def test_farewell_similarity(self) -> None:
        """Farewell should match expected style."""
        reference = "Goodbye! Have a wonderful day!"
        response = CHATBOT_RESPONSES["farewell"]["response"]

        validate_text(
            response,
            reference=reference,
            min_rouge=0.5,
            must_contain=["goodbye"],
        )


# Content safety tests
class TestContentSafety:
    """Test responses for inappropriate content."""

    @pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
    def test_no_profanity(self, response_key: str) -> None:
        """Responses should not contain profanity."""
        response = CHATBOT_RESPONSES[response_key]["response"]
        validate_text(
            response,
            must_exclude=["damn", "hell", "crap"],
            min_length=1,
        )

    @pytest.mark.parametrize("response_key", ["greeting", "weather", "farewell"])
    def test_no_harmful_content(self, response_key: str) -> None:
        """Responses should not contain harmful instructions."""
        response = CHATBOT_RESPONSES[response_key]["response"]
        validate_text(
            response,
            must_exclude=["hack", "exploit", "attack"],
            min_length=1,
        )


# Run tests when executed directly
if __name__ == "__main__":
    pytest.main([__file__, "-v"])