32.1 Why tokens cost more than you think

On this page

Hidden Costs
Counting Tokens
Cost Tracking
Where to go next

Hidden Costs

Tokens accumulate in places you don't expect:

// You think you're sending this:
"Summarize this article"              // 4 tokens

// But you're actually sending:
System instruction (500 tokens)
+ Chat history (2000 tokens)  
+ Retrieved context (3000 tokens)
+ User message (4 tokens)
─────────────────────────────
= 5,504 input tokens

// And receiving:
Response (800 tokens)
─────────────────────────────
= 800 output tokens

// Cost per request (Gemini 1.5 Pro):
// Input: 5,504 × $1.25/1M = $0.007
// Output: 800 × $5.00/1M = $0.004
// Total: $0.011 per request

Counting Tokens

// Token counting utility
import { GoogleGenerativeAI } from '@google/generative-ai';

async function countTokens(text: string): Promise {
  const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
  const model = genAI.getGenerativeModel({ model: 'gemini-1.5-flash' });
  
  const result = await model.countTokens(text);
  return result.totalTokens;
}

// Log token usage on every request
async function loggedGenerate(prompt: string) {
  const inputTokens = await countTokens(prompt);
  const response = await model.generateContent(prompt);
  const outputTokens = response.usageMetadata?.candidatesTokenCount || 0;
  
  console.log({
    inputTokens,
    outputTokens,
    estimatedCost: (inputTokens * 0.00000125) + (outputTokens * 0.000005)
  });
  
  return response;
}

Cost Tracking

// Track costs per feature/user
interface CostEntry {
  feature: string;
  userId: string;
  inputTokens: number;
  outputTokens: number;
  model: string;
  timestamp: Date;
}

async function trackCost(entry: CostEntry) {
  const pricing = {
    'gemini-1.5-flash': { input: 0.075, output: 0.30 },
    'gemini-1.5-pro': { input: 1.25, output: 5.00 }
  };
  
  const rate = pricing[entry.model];
  const cost = (entry.inputTokens * rate.input + 
                entry.outputTokens * rate.output) / 1_000_000;
  
  await db.costs.insert({ ...entry, costUSD: cost });
}

// Weekly report
// Feature X: 45% of total cost
// User tier breakdown: Free users consuming 80% of tokens

Where to go next

32.2 Summarize and compress context