158 lines
5.7 KiB
Python
158 lines
5.7 KiB
Python
"""Tests for the cost tracking module."""
|
|
|
|
import pytest
|
|
|
|
from arbiter.models.cost import AgentCost, CostEstimate, ReviewCost
|
|
from arbiter.models.enums import AgentName
|
|
|
|
|
|
class TestAgentCost:
|
|
def test_agent_cost_creation(self) -> None:
|
|
cost = AgentCost(
|
|
agent=AgentName.SECURITY,
|
|
tokens_in=100,
|
|
tokens_out=50,
|
|
total_tokens=150,
|
|
cost_usd=0.01,
|
|
)
|
|
assert cost.agent == AgentName.SECURITY
|
|
assert cost.total_tokens == 150
|
|
assert cost.cost_usd == 0.01
|
|
|
|
def test_agent_cost_defaults(self) -> None:
|
|
cost = AgentCost(agent=AgentName.STYLE)
|
|
assert cost.tokens_in == 0
|
|
assert cost.tokens_out == 0
|
|
assert cost.total_tokens == 0
|
|
assert cost.cost_usd == 0.0
|
|
|
|
|
|
class TestReviewCost:
|
|
def test_review_cost_defaults(self) -> None:
|
|
cost = ReviewCost()
|
|
assert cost.total_tokens == 0
|
|
assert cost.total_cost_usd == 0.0
|
|
assert cost.agent_costs == []
|
|
assert cost.cache_hits == 0
|
|
assert cost.cache_misses == 0
|
|
|
|
def test_add_agent_cost(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
|
|
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
|
|
|
|
assert len(cost.agent_costs) == 2
|
|
assert cost.total_tokens_in == 180
|
|
assert cost.total_tokens_out == 90
|
|
assert cost.total_tokens == 270
|
|
assert cost.total_cost_usd == pytest.approx(0.018)
|
|
|
|
def test_add_deliberation_cost(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_deliberation_cost(tokens_in=50, tokens_out=100, cost_usd=0.005)
|
|
|
|
assert cost.deliberation_tokens_in == 50
|
|
assert cost.deliberation_tokens_out == 100
|
|
assert cost.deliberation_cost_usd == 0.005
|
|
assert cost.total_tokens == 150
|
|
|
|
def test_combined_costs(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
|
|
cost.add_deliberation_cost(tokens_in=50, tokens_out=25, cost_usd=0.005)
|
|
|
|
assert cost.total_tokens_in == 150
|
|
assert cost.total_tokens_out == 75
|
|
assert cost.total_tokens == 225
|
|
assert cost.total_cost_usd == pytest.approx(0.015)
|
|
|
|
def test_to_agent_dict(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
|
|
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
|
|
|
|
agent_dict = cost.to_agent_dict()
|
|
assert agent_dict == {"security": 150, "style": 120}
|
|
|
|
def test_to_cost_dict(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
|
|
cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
|
|
|
|
cost_dict = cost.to_cost_dict()
|
|
assert cost_dict == {"security": 0.01, "style": 0.008}
|
|
|
|
def test_is_within_budget_true(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
|
|
|
|
assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is True
|
|
|
|
def test_is_within_budget_false_tokens(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=1000, tokens_out=500, cost_usd=0.01)
|
|
|
|
assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is False
|
|
|
|
def test_is_within_budget_false_cost(self) -> None:
|
|
cost = ReviewCost()
|
|
cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=1.0)
|
|
|
|
assert cost.is_within_budget(max_tokens=10000, max_cost_usd=0.50) is False
|
|
|
|
|
|
class TestCostEstimate:
|
|
def test_estimate_small_diff(self) -> None:
|
|
estimate = CostEstimate.estimate(
|
|
diff_size=1000,
|
|
agents=[AgentName.SECURITY, AgentName.STYLE],
|
|
model="gpt-4o-mini",
|
|
)
|
|
|
|
assert estimate.estimated_tokens > 0
|
|
assert estimate.estimated_cost_usd > 0
|
|
assert estimate.agents_enabled == [AgentName.SECURITY, AgentName.STYLE]
|
|
assert estimate.model == "gpt-4o-mini"
|
|
assert estimate.within_budget is True
|
|
|
|
def test_estimate_large_diff(self) -> None:
|
|
estimate = CostEstimate.estimate(
|
|
diff_size=100000,
|
|
agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
|
|
model="gpt-4o",
|
|
max_tokens=10000,
|
|
max_cost_usd=0.10,
|
|
)
|
|
|
|
# Large diff with expensive model should exceed budget
|
|
assert estimate.within_budget is False
|
|
|
|
def test_estimate_gpt4o_vs_mini(self) -> None:
|
|
estimate_4o = CostEstimate.estimate(
|
|
diff_size=10000,
|
|
agents=[AgentName.SECURITY],
|
|
model="gpt-4o",
|
|
)
|
|
estimate_mini = CostEstimate.estimate(
|
|
diff_size=10000,
|
|
agents=[AgentName.SECURITY],
|
|
model="gpt-4o-mini",
|
|
)
|
|
|
|
assert estimate_4o.estimated_cost_usd > estimate_mini.estimated_cost_usd
|
|
|
|
def test_estimate_more_agents_higher_cost(self) -> None:
|
|
estimate_one = CostEstimate.estimate(
|
|
diff_size=5000,
|
|
agents=[AgentName.SECURITY],
|
|
model="gpt-4o",
|
|
)
|
|
estimate_three = CostEstimate.estimate(
|
|
diff_size=5000,
|
|
agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
|
|
model="gpt-4o",
|
|
)
|
|
|
|
assert estimate_three.estimated_tokens > estimate_one.estimated_tokens
|
|
assert estimate_three.estimated_cost_usd > estimate_one.estimated_cost_usd
|