Back to Blog
The Math on Self-Hosting vs API
Accountants have a concept called "payback period"—how long until an investment returns its cost. Every self-hosting decision should start with this calculation.
Most teams don't do it. They assume "cheaper per token = cheaper overall." That assumption ignores fixed costs, migration effort, and opportunity cost.
The Break-Even Formula
def break_even_months(
monthly_api_cost: float,
gpu_monthly: float,
eng_salary_monthly: float,
eng_fraction: float, # How much of their time
setup_months: float,
setup_eng_fraction: float,
) -> float:
# Fixed costs
setup_cost = setup_months * eng_salary_monthly * setup_eng_fraction
# Ongoing costs
monthly_self_host = gpu_monthly + (eng_salary_monthly * eng_fraction)
# Monthly savings after migration
monthly_savings = monthly_api_cost - monthly_self_host
if monthly_savings <= 0:
return float('inf') # Never breaks even
return setup_cost / monthly_savings
# Example:
# $40K/month API cost
# $10K/month GPU cost
# $15K/month engineer salary, 30% time on infra
# 3 months setup at 50% engineer time
break_even = break_even_months(
monthly_api_cost=40000,
gpu_monthly=10000,
eng_salary_monthly=15000,
eng_fraction=0.30,
setup_months=3,
setup_eng_fraction=0.50
)
# Result: ~1.5 months to break even after setup
# Total time to ROI: 4.5 months
The Variables That Change Everything
class BreakEvenVariables:
def __init__(self):
# Often underestimated
self.setup_time = {
"optimistic": "2 weeks",
"realistic": "2-3 months",
"with_production_hardening": "4-6 months",
}
self.ongoing_eng_time = {
"if_everything_works": "10%",
"reality": "20-40%",
"during_incidents": "100%",
}
# Often ignored
self.opportunity_cost = {
"what_else_could_eng_build": "Features, not infra",
"hiring_difficulty": "ML infra people are scarce",
"context_switching": "On-call fragments focus",
}
The 6-Month Reality Check
def six_month_projection(scenario: str) -> dict:
scenarios = {
"optimistic": {
"month_1": "Setup complete, running in dev",
"month_2": "Production deployment",
"month_3": "Stable, start saving",
"month_4_6": "Smooth sailing, savings accumulate",
"total_savings": 60000,
},
"realistic": {
"month_1": "Environment setup, dependency hell",
"month_2": "First models running, performance issues",
"month_3": "Optimization, still slower than API",
"month_4": "Production deployment, incidents",
"month_5": "Stability, but engineer burned out",
"month_6": "Finally stable, savings start",
"total_savings": 10000,
},
"pessimistic": {
"month_1": "Procurement delays for GPUs",
"month_2": "Setup, security review",
"month_3": "Performance never matches API",
"month_4": "Fall back to API for high traffic",
"month_5": "Hybrid mode, complexity increases",
"month_6": "Engineer quits, back to API",
"total_savings": -80000,
}
}
return scenarios[scenario]
The Hidden Line Items
def true_cost_self_hosting() -> dict:
obvious = {
"gpu_instances": 10000, # per month
"storage": 500,
"networking": 300,
}
hidden = {
"egress": 3000, # Data transfer out
"logging": 2000, # Observability stack
"redundancy": 5000, # Failover instances
"security": 1000, # Scans, compliance
}
eng_time = {
"maintenance": 4000, # 0.25 FTE
"on_call": 2000, # Incident response
"optimization": 2000, # Keeping it fast
}
return {
"obvious_total": sum(obvious.values()), # $10,800
"hidden_total": sum(hidden.values()), # $11,000
"eng_total": sum(eng_time.values()), # $8,000
"true_monthly": 29800, # Not $10,800
}
When the Math Actually Works
def scenarios_that_work() -> list:
return [
{
"scenario": "High volume, stable traffic",
"monthly_tokens": "500M+",
"why_works": "Fixed costs amortized over huge volume",
},
{
"scenario": "Existing ML platform team",
"team": "3+ dedicated ML infra engineers",
"why_works": "Marginal cost to add LLM serving",
},
{
"scenario": "Latency-critical, regional",
"requirement": "<50ms P99",
"why_works": "API can't match, value exceeds cost",
},
{
"scenario": "Regulatory requirement",
"constraint": "Data can't leave premises",
"why_works": "No API option exists",
},
]
The break-even formula is simple. The inputs are hard to estimate honestly.
When in doubt, multiply your setup time estimate by 2 and your ongoing maintenance estimate by 1.5. If it still breaks even, consider it.