Performance Assertions
All are deterministic — no LLM calls.
latency_under(ms)
expect.latency_under(2000) # must respond within 2 secondscost_under(usd)
expect.cost_under(0.01) # must cost less than 1 centtoken_count_under(max_tokens)
expect.token_count_under(500) # max 500 total tokensCombining
@llm_test(
expect.is_not_empty(),
expect.latency_under(2000),
expect.cost_under(0.01),
expect.token_count_under(500),
model="gpt-5-mini",
)
def test_fast_and_cheap(llm):
output = llm("What is 2+2?")
assert "4" in output.contentLast updated on