Files
arbiter/tests/test_cost.py

158 lines
5.7 KiB
Python

"""Tests for the cost tracking module."""
import pytest
from arbiter.models.cost import AgentCost, CostEstimate, ReviewCost
from arbiter.models.enums import AgentName
class TestAgentCost:
def test_agent_cost_creation(self) -> None:
cost = AgentCost(
agent=AgentName.SECURITY,
tokens_in=100,
tokens_out=50,
total_tokens=150,
cost_usd=0.01,
)
assert cost.agent == AgentName.SECURITY
assert cost.total_tokens == 150
assert cost.cost_usd == 0.01
def test_agent_cost_defaults(self) -> None:
cost = AgentCost(agent=AgentName.STYLE)
assert cost.tokens_in == 0
assert cost.tokens_out == 0
assert cost.total_tokens == 0
assert cost.cost_usd == 0.0
class TestReviewCost:
def test_review_cost_defaults(self) -> None:
cost = ReviewCost()
assert cost.total_tokens == 0
assert cost.total_cost_usd == 0.0
assert cost.agent_costs == []
assert cost.cache_hits == 0
assert cost.cache_misses == 0
def test_add_agent_cost(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
assert len(cost.agent_costs) == 2
assert cost.total_tokens_in == 180
assert cost.total_tokens_out == 90
assert cost.total_tokens == 270
assert cost.total_cost_usd == pytest.approx(0.018)
def test_add_deliberation_cost(self) -> None:
cost = ReviewCost()
cost.add_deliberation_cost(tokens_in=50, tokens_out=100, cost_usd=0.005)
assert cost.deliberation_tokens_in == 50
assert cost.deliberation_tokens_out == 100
assert cost.deliberation_cost_usd == 0.005
assert cost.total_tokens == 150
def test_combined_costs(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
cost.add_deliberation_cost(tokens_in=50, tokens_out=25, cost_usd=0.005)
assert cost.total_tokens_in == 150
assert cost.total_tokens_out == 75
assert cost.total_tokens == 225
assert cost.total_cost_usd == pytest.approx(0.015)
def test_to_agent_dict(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
agent_dict = cost.to_agent_dict()
assert agent_dict == {"security": 150, "style": 120}
def test_to_cost_dict(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
cost_dict = cost.to_cost_dict()
assert cost_dict == {"security": 0.01, "style": 0.008}
def test_is_within_budget_true(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is True
def test_is_within_budget_false_tokens(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=1000, tokens_out=500, cost_usd=0.01)
assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is False
def test_is_within_budget_false_cost(self) -> None:
cost = ReviewCost()
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=1.0)
assert cost.is_within_budget(max_tokens=10000, max_cost_usd=0.50) is False
class TestCostEstimate:
def test_estimate_small_diff(self) -> None:
estimate = CostEstimate.estimate(
diff_size=1000,
agents=[AgentName.SECURITY, AgentName.STYLE],
model="gpt-4o-mini",
)
assert estimate.estimated_tokens > 0
assert estimate.estimated_cost_usd > 0
assert estimate.agents_enabled == [AgentName.SECURITY, AgentName.STYLE]
assert estimate.model == "gpt-4o-mini"
assert estimate.within_budget is True
def test_estimate_large_diff(self) -> None:
estimate = CostEstimate.estimate(
diff_size=100000,
agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
model="gpt-4o",
max_tokens=10000,
max_cost_usd=0.10,
)
# Large diff with expensive model should exceed budget
assert estimate.within_budget is False
def test_estimate_gpt4o_vs_mini(self) -> None:
estimate_4o = CostEstimate.estimate(
diff_size=10000,
agents=[AgentName.SECURITY],
model="gpt-4o",
)
estimate_mini = CostEstimate.estimate(
diff_size=10000,
agents=[AgentName.SECURITY],
model="gpt-4o-mini",
)
assert estimate_4o.estimated_cost_usd > estimate_mini.estimated_cost_usd
def test_estimate_more_agents_higher_cost(self) -> None:
estimate_one = CostEstimate.estimate(
diff_size=5000,
agents=[AgentName.SECURITY],
model="gpt-4o",
)
estimate_three = CostEstimate.estimate(
diff_size=5000,
agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
model="gpt-4o",
)
assert estimate_three.estimated_tokens > estimate_one.estimated_tokens
assert estimate_three.estimated_cost_usd > estimate_one.estimated_cost_usd