Home/
Part XI — Performance & Cost Optimization (Making It Fast and Affordable)/32. Token Economics for Builders/32.1 Why tokens cost more than you think
32.1 Why tokens cost more than you think
Overview and links for this section of the guide.
On this page
Hidden Costs
Tokens accumulate in places you don't expect:
// You think you're sending this:
"Summarize this article" // 4 tokens
// But you're actually sending:
System instruction (500 tokens)
+ Chat history (2000 tokens)
+ Retrieved context (3000 tokens)
+ User message (4 tokens)
─────────────────────────────
= 5,504 input tokens
// And receiving:
Response (800 tokens)
─────────────────────────────
= 800 output tokens
// Cost per request (Gemini 1.5 Pro):
// Input: 5,504 × $1.25/1M = $0.007
// Output: 800 × $5.00/1M = $0.004
// Total: $0.011 per request
Counting Tokens
// Token counting utility
import { GoogleGenerativeAI } from '@google/generative-ai';
async function countTokens(text: string): Promise {
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
const model = genAI.getGenerativeModel({ model: 'gemini-1.5-flash' });
const result = await model.countTokens(text);
return result.totalTokens;
}
// Log token usage on every request
async function loggedGenerate(prompt: string) {
const inputTokens = await countTokens(prompt);
const response = await model.generateContent(prompt);
const outputTokens = response.usageMetadata?.candidatesTokenCount || 0;
console.log({
inputTokens,
outputTokens,
estimatedCost: (inputTokens * 0.00000125) + (outputTokens * 0.000005)
});
return response;
}
Cost Tracking
// Track costs per feature/user
interface CostEntry {
feature: string;
userId: string;
inputTokens: number;
outputTokens: number;
model: string;
timestamp: Date;
}
async function trackCost(entry: CostEntry) {
const pricing = {
'gemini-1.5-flash': { input: 0.075, output: 0.30 },
'gemini-1.5-pro': { input: 1.25, output: 5.00 }
};
const rate = pricing[entry.model];
const cost = (entry.inputTokens * rate.input +
entry.outputTokens * rate.output) / 1_000_000;
await db.costs.insert({ ...entry, costUSD: cost });
}
// Weekly report
// Feature X: 45% of total cost
// User tier breakdown: Free users consuming 80% of tokens