Back to Blog
The Costs You're Not Tracking
Homeowners learn about "invisible" costs the hard way. Insurance, property taxes, maintenance—the mortgage payment is half the story. Smart buyers calculate total monthly cost, not just principal and interest.
LLM platforms have their own invisible costs. They don't show up on the GPU invoice, but they show up on the finance report.
The Invisible Line Items
def invisible_costs_breakdown():
"""
Real numbers from a mid-sized deployment
~50K requests/day, 2x H100
"""
return {
"what_you_budgeted": {
"gpu_compute": 8000,
},
"what_you_forgot": {
# Infrastructure
"egress_charges": 2800, # Responses leave the VPC
"logging_ingestion": 1500, # Every request logged
"log_storage": 400, # 90-day retention
"metrics_storage": 300, # Prometheus/Datadog
"backup_storage": 200, # Model checkpoints
# Operations
"on_call_engineer_time": 3000, # 20 hrs/month
"incident_response_time": 1500, # When things break
"capacity_planning_time": 1000, # Forecasting
# Security & Compliance
"security_scanning": 300,
"ssl_certificates": 50,
"audit_logging": 400,
},
"total_invisible": 11450,
"actual_vs_budget": "8000 vs 19450 (2.4x)",
}
Egress: Death by a Thousand Cuts
class EgressCostAnalysis:
"""
Cloud providers charge for data leaving
"""
def calculate(self, daily_requests: int):
# Average response size
avg_response_tokens = 200
bytes_per_token = 4 # Rough estimate
avg_response_bytes = avg_response_tokens * bytes_per_token # 800 bytes
# Daily egress
daily_gb = daily_requests * avg_response_bytes / 1e9
# Cloud egress pricing (tiered, simplified)
price_per_gb = 0.08 # Average across tiers
monthly_cost = daily_gb * 30 * price_per_gb
# The trap: streaming multiplies egress
# Each chunk is a separate response with overhead
streaming_multiplier = 3 # Headers, chunking overhead
actual_monthly = monthly_cost * streaming_multiplier
return {
"naive_estimate": monthly_cost,
"with_streaming": actual_monthly,
"at_100K_requests_day": f"${actual_monthly * 2:.0f}",
}
Logging: Necessary but Expensive
def logging_cost_reality():
"""
You need logs. Logs cost money.
"""
# What you need to log per request
log_components = {
"request_metadata": 200, # Timestamp, user, model
"prompt_hash": 64, # Not full prompt (privacy)
"response_metadata": 150,
"latency_breakdown": 100,
"token_counts": 50,
"error_details": 200, # If any
}
bytes_per_request = sum(log_components.values()) # ~764 bytes
# At scale
def monthly_cost(requests_per_day: int) -> float:
gb_per_day = requests_per_day * bytes_per_request / 1e9
gb_per_month = gb_per_day * 30
# Typical cloud logging cost
ingestion_cost = gb_per_month * 0.50 # Per GB ingested
storage_cost = gb_per_month * 0.03 * 3 # 90-day retention
query_cost = gb_per_month * 0.10 # Reasonable query volume
return ingestion_cost + storage_cost + query_cost
return {
"10K_requests_day": monthly_cost(10_000), # ~$12
"100K_requests_day": monthly_cost(100_000), # ~$120
"1M_requests_day": monthly_cost(1_000_000), # ~$1,200
}
Engineering Time Nobody Counts
def engineering_time_cost():
"""
Your engineers' time has a cost
"""
fully_loaded_hourly = 100 # $200K/year ÷ 2000 hours
monthly_hours = {
"on_call_incidents": {
"hours": 15,
"description": "Responding to alerts, debugging",
},
"routine_maintenance": {
"hours": 8,
"description": "Updates, patches, restarts",
},
"capacity_planning": {
"hours": 4,
"description": "Monitoring trends, planning scaling",
},
"optimization_work": {
"hours": 8,
"description": "Improving performance, reducing cost",
},
"documentation": {
"hours": 3,
"description": "Runbooks, post-mortems",
},
"security_reviews": {
"hours": 2,
"description": "Access audits, vulnerability review",
},
}
total_hours = sum(item["hours"] for item in monthly_hours.values())
total_cost = total_hours * fully_loaded_hourly
return {
"hours_per_month": total_hours, # 40 hours
"cost_per_month": total_cost, # $4,000
"fte_fraction": total_hours / 160, # 0.25 FTE
}
The Cost Tracking System You Need
class CostTracker:
"""
Track ALL costs, not just compute
"""
def __init__(self):
self.categories = {
"compute": [], # GPU, CPU instances
"storage": [], # Models, logs, backups
"networking": [], # Egress, load balancers
"observability": [], # Logging, metrics, tracing
"security": [], # Scanning, certificates
"people": [], # On-call, maintenance time
}
def add_cost(self, category: str, item: str, amount: float):
self.categories[category].append({
"item": item,
"amount": amount,
"date": datetime.now(),
})
def monthly_report(self) -> dict:
report = {}
for category, items in self.categories.items():
report[category] = sum(item["amount"] for item in items)
report["total"] = sum(report.values())
report["compute_percentage"] = report["compute"] / report["total"] * 100
return report
# Target: compute should be 60-70% of total
# If compute is >80%, you're not tracking something
# If compute is <50%, your overhead is too high
The Checklist
What to track that you're probably not tracking:
hidden_cost_checklist = [
"Egress charges (responses, logs, metrics export)",
"Log ingestion cost",
"Log storage cost (what's your retention?)",
"Metrics storage (Prometheus, Datadog, etc.)",
"SSL certificate management",
"DNS costs",
"Load balancer charges",
"NAT gateway costs",
"Backup storage",
"Dev/staging environment costs",
"On-call engineer hours × hourly rate",
"Incident response hours × hourly rate",
"Security scanning costs",
"Compliance audit preparation time",
]
# Rule: If you can't find a line item for it,
# you're probably paying for it somewhere you don't realize
The invisible costs aren't invisible—they're just spread across different invoices and different teams' budgets. Consolidate them into one view, and suddenly your "cheap" deployment isn't so cheap.