37.5 Auditability and replay

Overview and links for this section of the guide.

Why Auditability

Agents are opaque. When something goes wrong, you need to answer:

  • What did the agent think?
  • What actions did it take?
  • Why did it make that decision?
  • Can I reproduce this behavior?

Structured Logging

// audit-log.ts
interface AuditEntry {
  id: string;
  timestamp: number;
  type: 'thought' | 'action' | 'observation' | 'decision';
  
  // Content
  content: string;
  
  // Context
  stepNumber: number;
  tokensUsed: number;
  latencyMs: number;
  
  // For actions
  toolName?: string;
  toolParams?: object;
  toolResult?: any;
  
  // For decisions
  alternatives?: string[];
  confidence?: number;
  
  // Debugging
  promptSent?: string;
  rawResponse?: string;
}

class AuditLogger {
  private entries: AuditEntry[] = [];
  private runId: string;
  
  constructor() {
    this.runId = crypto.randomUUID();
  }
  
  log(entry: Omit) {
    this.entries.push({
      ...entry,
      id: `${this.runId}-${this.entries.length}`,
      timestamp: Date.now()
    });
  }
  
  export(): string {
    return JSON.stringify({
      runId: this.runId,
      startTime: this.entries[0]?.timestamp,
      endTime: this.entries[this.entries.length - 1]?.timestamp,
      totalSteps: this.entries.filter(e => e.type === 'action').length,
      entries: this.entries
    }, null, 2);
  }
  
  // Query interface
  getActionsByTool(toolName: string): AuditEntry[] {
    return this.entries.filter(e => e.toolName === toolName);
  }
  
  getFailures(): AuditEntry[] {
    return this.entries.filter(e => 
      e.type === 'observation' && e.content.includes('error')
    );
  }
}

Replay System

Replay lets you re-run an agent session with the same inputs to debug issues:

// replay.ts
interface ReplayConfig {
  // Use recorded observations instead of executing tools
  mockTools: boolean;
  
  // Step through one action at a time
  stepByStep: boolean;
  
  // Modify the prompt to test variations
  promptOverride?: string;
}

class AgentReplayer {
  private log: AuditEntry[];
  private currentStep: number = 0;
  
  constructor(auditLog: string) {
    const parsed = JSON.parse(auditLog);
    this.log = parsed.entries;
  }
  
  async replay(config: ReplayConfig = { mockTools: true, stepByStep: false }) {
    const results: ReplayResult[] = [];
    
    for (const entry of this.log) {
      if (entry.type === 'action' && entry.toolName) {
        let result;
        
        if (config.mockTools) {
          // Use recorded result
          const observationEntry = this.log.find(e => 
            e.stepNumber === entry.stepNumber && e.type === 'observation'
          );
          result = observationEntry?.toolResult;
        } else {
          // Actually execute the tool
          result = await this.executeTool(entry.toolName, entry.toolParams);
        }
        
        results.push({
          step: entry.stepNumber,
          action: entry.toolName,
          originalResult: this.log.find(e => 
            e.stepNumber === entry.stepNumber && e.type === 'observation'
          )?.toolResult,
          replayResult: result,
          match: JSON.stringify(result) === JSON.stringify(entry.toolResult)
        });
        
        if (config.stepByStep) {
          await this.waitForContinue();
        }
      }
    }
    
    return results;
  }
}

Visualization

Generate a visual trace of agent execution:

// visualize.ts
function generateTraceHtml(log: AuditEntry[]): string {
  const steps = log.filter(e => e.type === 'action' || e.type === 'thought');
  
  return `



  


  

Agent Trace

${steps.map(step => `
Step ${step.stepNumber} | ${step.latencyMs}ms | ${step.tokensUsed} tokens
${step.type.toUpperCase()}: ${step.content} ${step.toolName ? `
${step.toolName}(${JSON.stringify(step.toolParams)})` : ''}
`).join('')} `; } // Mermaid diagram generation function generateMermaidDiagram(log: AuditEntry[]): string { const actions = log.filter(e => e.type === 'action'); let diagram = 'graph TD\n'; diagram += ' START((Start))\n'; actions.forEach((action, i) => { const id = `A${i}`; const label = action.toolName || 'unknown'; diagram += ` ${id}[${label}]\n`; if (i === 0) { diagram += ` START --> ${id}\n`; } else { diagram += ` A${i-1} --> ${id}\n`; } }); diagram += ` A${actions.length - 1} --> END((End))\n`; return diagram; }
Log Everything in Development

In development, log full prompts and responses. In production, log decisions and results but redact sensitive data. You'll thank yourself when debugging.

Where to go next