34.5 Guardrails: refuse risky changes and require approvals

On this page

Why Guardrails Matter
Blocking Destructive Actions
Protecting Secrets
Protecting Critical Files
The Approval Flow
Complete Implementation
Where to go next

Why Guardrails Matter

An AI coding assistant with file write and command execution capabilities is powerful—and dangerous. Without guardrails, a single prompt injection or hallucination could:

Delete your production database with rm -rf
Push broken code to main with git push --force
Leak your API keys by sending them to an external server
Modify authentication logic to bypass password checks

Guardrails aren't optional—they're the difference between a useful tool and a liability.

Blocking Destructive Actions

Your tool needs strict allowlists and blocklists for commands:

// safety/command-validator.ts

// Commands that are always safe (read-only)
const ALLOWED_COMMANDS = new Set([
  'ls', 'cat', 'head', 'tail', 'grep', 'find', 'wc',
  'pwd', 'echo', 'date', 'which', 'type', 'file',
  'git status', 'git diff', 'git log', 'git branch',
  'npm list', 'npm outdated', 'npm audit',
  'node --version', 'npm --version'
]);

// Commands that require explicit approval
const APPROVAL_REQUIRED = new Set([
  'npm test', 'npm run', 'npm install',
  'git checkout', 'git stash',
  'mkdir', 'touch'
]);

// Commands that are NEVER allowed
const BLOCKED_PATTERNS = [
  /\brm\s+(-[rf]+\s+)?/,           // rm, rm -rf
  /\bgit\s+push/,                   // git push
  /\bgit\s+reset\s+--hard/,         // git reset --hard
  /\bgit\s+checkout\s+--force/,     // git checkout --force
  /\bsudo\b/,                       // sudo anything
  /\bchmod\s+777/,                  // chmod 777
  /\bcurl.*\|.*sh/,                 // curl | sh
  /\bwget.*\|.*sh/,                 // wget | sh
  /\beval\b/,                       // eval
  /\bnc\s+-/,                       // netcat
  />\s*\/dev\/(sd|hd|nvme)/,        // writing to disk devices
  /\bdd\s+.*of=/,                   // dd (disk destroyer)
  /\bkill\s+-9/,                    // kill -9
  /\bpkill\b/,                      // pkill
  /\bkillall\b/,                    // killall
  /\breboot\b/,                     // reboot
  /\bshutdown\b/,                   // shutdown
  /\bformat\b/,                     // format
];

interface CommandValidation {
  allowed: boolean;
  requiresApproval: boolean;
  reason?: string;
}

export function validateCommand(command: string): CommandValidation {
  const normalizedCmd = command.trim().toLowerCase();
  
  // Check blocklist first (highest priority)
  for (const pattern of BLOCKED_PATTERNS) {
    if (pattern.test(normalizedCmd)) {
      return {
        allowed: false,
        requiresApproval: false,
        reason: `Command matches blocked pattern: ${pattern}`
      };
    }
  }
  
  // Check if in allowlist (no approval needed)
  for (const allowed of ALLOWED_COMMANDS) {
    if (normalizedCmd.startsWith(allowed)) {
      return { allowed: true, requiresApproval: false };
    }
  }
  
  // Check if requires approval
  for (const needsApproval of APPROVAL_REQUIRED) {
    if (normalizedCmd.startsWith(needsApproval)) {
      return { 
        allowed: true, 
        requiresApproval: true,
        reason: 'Command modifies project state'
      };
    }
  }
  
  // Unknown command - block by default
  return {
    allowed: false,
    requiresApproval: false,
    reason: 'Command not in allowlist. Add it explicitly if safe.'
  };
}

Protecting Secrets

Before sending any file to the LLM, scan for and redact sensitive information:

// safety/secret-scanner.ts

const SECRET_PATTERNS = [
  // API Keys
  { pattern: /sk-[a-zA-Z0-9]{32,}/g, name: 'OpenAI API Key' },
  { pattern: /sk-live-[a-zA-Z0-9]+/g, name: 'Stripe Live Key' },
  { pattern: /sk-test-[a-zA-Z0-9]+/g, name: 'Stripe Test Key' },
  { pattern: /AIza[0-9A-Za-z-_]{35}/g, name: 'Google API Key' },
  { pattern: /ghp_[a-zA-Z0-9]{36}/g, name: 'GitHub Personal Token' },
  { pattern: /gho_[a-zA-Z0-9]{36}/g, name: 'GitHub OAuth Token' },
  { pattern: /xox[baprs]-[0-9a-zA-Z-]+/g, name: 'Slack Token' },
  
  // AWS
  { pattern: /AKIA[0-9A-Z]{16}/g, name: 'AWS Access Key' },
  { pattern: /aws_secret_access_key\s*=\s*["']?[A-Za-z0-9/+=]{40}["']?/gi, name: 'AWS Secret' },
  
  // Private Keys
  { pattern: /-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----/g, name: 'Private Key' },
  { pattern: /-----BEGIN OPENSSH PRIVATE KEY-----/g, name: 'SSH Private Key' },
  
  // Database URLs
  { pattern: /(?:mongodb|postgres|mysql|redis):\/\/[^\s'"]+/g, name: 'Database URL' },
  
  // Generic patterns
  { pattern: /password\s*[:=]\s*["'][^"']+["']/gi, name: 'Password' },
  { pattern: /secret\s*[:=]\s*["'][^"']+["']/gi, name: 'Secret' },
  { pattern: /token\s*[:=]\s*["'][^"']+["']/gi, name: 'Token' },
];

interface SecretMatch {
  type: string;
  startIndex: number;
  endIndex: number;
  original: string;
}

export function scanForSecrets(content: string): SecretMatch[] {
  const matches: SecretMatch[] = [];
  
  for (const { pattern, name } of SECRET_PATTERNS) {
    // Reset regex state
    pattern.lastIndex = 0;
    
    let match;
    while ((match = pattern.exec(content)) !== null) {
      matches.push({
        type: name,
        startIndex: match.index,
        endIndex: match.index + match[0].length,
        original: match[0]
      });
    }
  }
  
  return matches;
}

export function redactSecrets(content: string): { 
  redacted: string; 
  secretsFound: SecretMatch[] 
} {
  const secrets = scanForSecrets(content);
  let redacted = content;
  
  // Replace in reverse order to preserve indices
  for (const secret of secrets.sort((a, b) => b.startIndex - a.startIndex)) {
    const replacement = `[REDACTED_${secret.type.toUpperCase().replace(/\s+/g, '_')}]`;
    redacted = redacted.slice(0, secret.startIndex) + 
               replacement + 
               redacted.slice(secret.endIndex);
  }
  
  return { redacted, secretsFound: secrets };
}

Protecting Critical Files

Some files should require extra confirmation or be off-limits entirely:

// safety/file-protector.ts

const PROTECTED_FILES = {
  // Never modify these
  blocked: [
    '.env',
    '.env.local',
    '.env.production',
    'id_rsa',
    'id_ed25519',
    '*.pem',
    '*.key',
    'credentials.json',
    'serviceAccountKey.json',
  ],
  
  // Require explicit confirmation
  requireApproval: [
    'package.json',         // Can break build
    'package-lock.json',    // Should be auto-generated
    '*.config.js',          // Config files
    'tsconfig.json',        // TypeScript config
    '.github/*',            // CI/CD files
    'Dockerfile',           // Container config
    'docker-compose.yml',   // Container orchestration
    '*.sql',                // Database changes
    '**/migrations/*',      // Database migrations
  ],
  
  // Warn but allow
  warn: [
    'src/auth/*',           // Authentication logic
    'src/security/*',       // Security-related code
    '**/middleware/*',      // Request processing
  ]
};

export function checkFilePermission(filePath: string): {
  allowed: boolean;
  requiresApproval: boolean;
  warning?: string;
} {
  const normalizedPath = filePath.toLowerCase();
  
  // Check blocked files
  for (const pattern of PROTECTED_FILES.blocked) {
    if (matchGlob(normalizedPath, pattern)) {
      return {
        allowed: false,
        requiresApproval: false,
        warning: `File "${filePath}" is in the protected blocklist and cannot be modified.`
      };
    }
  }
  
  // Check files requiring approval
  for (const pattern of PROTECTED_FILES.requireApproval) {
    if (matchGlob(normalizedPath, pattern)) {
      return {
        allowed: true,
        requiresApproval: true,
        warning: `File "${filePath}" is a critical config file. Changes require explicit approval.`
      };
    }
  }
  
  // Check warning files
  for (const pattern of PROTECTED_FILES.warn) {
    if (matchGlob(normalizedPath, pattern)) {
      return {
        allowed: true,
        requiresApproval: false,
        warning: `⚠️ Modifying security-sensitive file: ${filePath}`
      };
    }
  }
  
  return { allowed: true, requiresApproval: false };
}

The Approval Flow

Every change goes through a clear approval process:

┌─────────────────────────────────────────────────────────────────┐
│                      APPROVAL FLOW                               │
├─────────────────────────────────────────────────────────────────┤
│                                                                  │
│   ┌──────────┐     ┌──────────┐     ┌──────────┐               │
│   │ VALIDATE │────▶│  REVIEW  │────▶│  APPLY   │               │
│   └──────────┘     └──────────┘     └──────────┘               │
│        │                │                │                      │
│        ▼                ▼                ▼                      │
│   - Check file     - Show diff      - Write to file            │
│     permissions    - Show risks     - Run tests                │
│   - Scan secrets   - Wait for       - Verify success           │
│   - Validate         user input                                │
│     command                                                     │
│                                                                  │
│   If blocked:      If rejected:     If tests fail:             │
│   ───▶ STOP        ───▶ STOP        ───▶ ROLLBACK              │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘

Complete Implementation

// safety/guardrails.ts
import { validateCommand } from './command-validator';
import { redactSecrets } from './secret-scanner';
import { checkFilePermission } from './file-protector';
import * as readline from 'readline';

interface ApprovalResult {
  approved: boolean;
  reason?: string;
}

async function confirmWithUser(message: string): Promise {
  const rl = readline.createInterface({
    input: process.stdin,
    output: process.stdout
  });
  
  return new Promise((resolve) => {
    rl.question(`${message} (y/n): `, (answer) => {
      rl.close();
      resolve(answer.toLowerCase() === 'y');
    });
  });
}

export class Guardrails {
  async validateAndPrepareFile(
    filePath: string, 
    content: string
  ): Promise<{ content: string; warnings: string[] }> {
    const warnings: string[] = [];
    
    // Check file permissions
    const permission = checkFilePermission(filePath);
    if (!permission.allowed) {
      throw new Error(`Access denied: ${permission.warning}`);
    }
    if (permission.warning) {
      warnings.push(permission.warning);
    }
    
    // Redact secrets
    const { redacted, secretsFound } = redactSecrets(content);
    if (secretsFound.length > 0) {
      warnings.push(`Redacted ${secretsFound.length} potential secrets`);
      secretsFound.forEach(s => warnings.push(`  - ${s.type}`));
    }
    
    return { content: redacted, warnings };
  }
  
  async approveFileChange(
    filePath: string,
    diff: string
  ): Promise {
    const permission = checkFilePermission(filePath);
    
    if (!permission.allowed) {
      return { approved: false, reason: permission.warning };
    }
    
    console.log('\n' + '─'.repeat(60));
    console.log(`📝 Proposed change to: ${filePath}`);
    console.log('─'.repeat(60));
    console.log(diff);
    console.log('─'.repeat(60));
    
    if (permission.warning) {
      console.log(`⚠️  ${permission.warning}`);
    }
    
    const approved = await confirmWithUser('Apply this change?');
    
    return { 
      approved, 
      reason: approved ? undefined : 'User rejected change' 
    };
  }
  
  async approveCommand(command: string): Promise {
    const validation = validateCommand(command);
    
    if (!validation.allowed) {
      console.log(`🚫 Command blocked: ${validation.reason}`);
      return { approved: false, reason: validation.reason };
    }
    
    if (validation.requiresApproval) {
      console.log(`\n⚡ Command requires approval: ${command}`);
      if (validation.reason) {
        console.log(`   Reason: ${validation.reason}`);
      }
      
      const approved = await confirmWithUser('Execute this command?');
      return { 
        approved, 
        reason: approved ? undefined : 'User rejected command' 
      };
    }
    
    // No approval needed
    return { approved: true };
  }
}

Defense in Depth

Never rely on a single guardrail. Use layers: command validation + file protection + secret scanning + user approval. If one layer fails, others catch the issue.

Where to go next

35. Project 4: Customer Support Triage Assistant