Back to Blog

The Costs You're Not Tracking

Homeowners learn about "invisible" costs the hard way. Insurance, property taxes, maintenance—the mortgage payment is half the story. Smart buyers calculate total monthly cost, not just principal and interest.

LLM platforms have their own invisible costs. They don't show up on the GPU invoice, but they show up on the finance report.

The Invisible Line Items

def invisible_costs_breakdown():
    """
    Real numbers from a mid-sized deployment
    ~50K requests/day, 2x H100
    """
    return {
        "what_you_budgeted": {
            "gpu_compute": 8000,
        },

        "what_you_forgot": {
            # Infrastructure
            "egress_charges": 2800,  # Responses leave the VPC
            "logging_ingestion": 1500,  # Every request logged
            "log_storage": 400,  # 90-day retention
            "metrics_storage": 300,  # Prometheus/Datadog
            "backup_storage": 200,  # Model checkpoints

            # Operations
            "on_call_engineer_time": 3000,  # 20 hrs/month
            "incident_response_time": 1500,  # When things break
            "capacity_planning_time": 1000,  # Forecasting

            # Security & Compliance
            "security_scanning": 300,
            "ssl_certificates": 50,
            "audit_logging": 400,
        },

        "total_invisible": 11450,
        "actual_vs_budget": "8000 vs 19450 (2.4x)",
    }

Egress: Death by a Thousand Cuts

class EgressCostAnalysis:
    """
    Cloud providers charge for data leaving
    """
    def calculate(self, daily_requests: int):
        # Average response size
        avg_response_tokens = 200
        bytes_per_token = 4  # Rough estimate
        avg_response_bytes = avg_response_tokens * bytes_per_token  # 800 bytes

        # Daily egress
        daily_gb = daily_requests * avg_response_bytes / 1e9

        # Cloud egress pricing (tiered, simplified)
        price_per_gb = 0.08  # Average across tiers

        monthly_cost = daily_gb * 30 * price_per_gb

        # The trap: streaming multiplies egress
        # Each chunk is a separate response with overhead
        streaming_multiplier = 3  # Headers, chunking overhead

        actual_monthly = monthly_cost * streaming_multiplier

        return {
            "naive_estimate": monthly_cost,
            "with_streaming": actual_monthly,
            "at_100K_requests_day": f"${actual_monthly * 2:.0f}",
        }

Logging: Necessary but Expensive

def logging_cost_reality():
    """
    You need logs. Logs cost money.
    """
    # What you need to log per request
    log_components = {
        "request_metadata": 200,  # Timestamp, user, model
        "prompt_hash": 64,  # Not full prompt (privacy)
        "response_metadata": 150,
        "latency_breakdown": 100,
        "token_counts": 50,
        "error_details": 200,  # If any
    }
    bytes_per_request = sum(log_components.values())  # ~764 bytes

    # At scale
    def monthly_cost(requests_per_day: int) -> float:
        gb_per_day = requests_per_day * bytes_per_request / 1e9
        gb_per_month = gb_per_day * 30

        # Typical cloud logging cost
        ingestion_cost = gb_per_month * 0.50  # Per GB ingested
        storage_cost = gb_per_month * 0.03 * 3  # 90-day retention
        query_cost = gb_per_month * 0.10  # Reasonable query volume

        return ingestion_cost + storage_cost + query_cost

    return {
        "10K_requests_day": monthly_cost(10_000),   # ~$12
        "100K_requests_day": monthly_cost(100_000), # ~$120
        "1M_requests_day": monthly_cost(1_000_000), # ~$1,200
    }

Engineering Time Nobody Counts

def engineering_time_cost():
    """
    Your engineers' time has a cost
    """
    fully_loaded_hourly = 100  # $200K/year ÷ 2000 hours

    monthly_hours = {
        "on_call_incidents": {
            "hours": 15,
            "description": "Responding to alerts, debugging",
        },
        "routine_maintenance": {
            "hours": 8,
            "description": "Updates, patches, restarts",
        },
        "capacity_planning": {
            "hours": 4,
            "description": "Monitoring trends, planning scaling",
        },
        "optimization_work": {
            "hours": 8,
            "description": "Improving performance, reducing cost",
        },
        "documentation": {
            "hours": 3,
            "description": "Runbooks, post-mortems",
        },
        "security_reviews": {
            "hours": 2,
            "description": "Access audits, vulnerability review",
        },
    }

    total_hours = sum(item["hours"] for item in monthly_hours.values())
    total_cost = total_hours * fully_loaded_hourly

    return {
        "hours_per_month": total_hours,  # 40 hours
        "cost_per_month": total_cost,    # $4,000
        "fte_fraction": total_hours / 160,  # 0.25 FTE
    }

The Cost Tracking System You Need

class CostTracker:
    """
    Track ALL costs, not just compute
    """
    def __init__(self):
        self.categories = {
            "compute": [],  # GPU, CPU instances
            "storage": [],  # Models, logs, backups
            "networking": [],  # Egress, load balancers
            "observability": [],  # Logging, metrics, tracing
            "security": [],  # Scanning, certificates
            "people": [],  # On-call, maintenance time
        }

    def add_cost(self, category: str, item: str, amount: float):
        self.categories[category].append({
            "item": item,
            "amount": amount,
            "date": datetime.now(),
        })

    def monthly_report(self) -> dict:
        report = {}
        for category, items in self.categories.items():
            report[category] = sum(item["amount"] for item in items)
        report["total"] = sum(report.values())
        report["compute_percentage"] = report["compute"] / report["total"] * 100
        return report

# Target: compute should be 60-70% of total
# If compute is >80%, you're not tracking something
# If compute is <50%, your overhead is too high

The Checklist

What to track that you're probably not tracking:

hidden_cost_checklist = [
    "Egress charges (responses, logs, metrics export)",
    "Log ingestion cost",
    "Log storage cost (what's your retention?)",
    "Metrics storage (Prometheus, Datadog, etc.)",
    "SSL certificate management",
    "DNS costs",
    "Load balancer charges",
    "NAT gateway costs",
    "Backup storage",
    "Dev/staging environment costs",
    "On-call engineer hours × hourly rate",
    "Incident response hours × hourly rate",
    "Security scanning costs",
    "Compliance audit preparation time",
]

# Rule: If you can't find a line item for it,
# you're probably paying for it somewhere you don't realize

The invisible costs aren't invisible—they're just spread across different invoices and different teams' budgets. Consolidate them into one view, and suddenly your "cheap" deployment isn't so cheap.