35.5 Evaluation and privacy controls

Overview and links for this section of the guide.

PII Redaction

Customer emails contain sensitive data that should never reach the LLM:

  • Credit card numbers: 4000-1234-5678-9010
  • Social Security Numbers: 123-45-6789
  • Phone numbers: +1 (555) 123-4567
  • Physical addresses: 123 Main St, Anytown, USA
  • Email addresses: (Sometimes keep, sometimes redact)
  • Account numbers: Bank accounts, routing numbers

The model doesn't need these to understand the question. "My card ending in [REDACTED] was declined" is just as useful as the full number.

Implementing the Scrubber

// pii-scrubber.ts
interface PIIMatch {
  type: string;
  start: number;
  end: number;
  original: string;
  replacement: string;
}

const PII_PATTERNS = [
  {
    name: 'CREDIT_CARD',
    pattern: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
    replacement: '[CREDIT_CARD]'
  },
  {
    name: 'SSN',
    pattern: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
    replacement: '[SSN]'
  },
  {
    name: 'PHONE',
    pattern: /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
    replacement: '[PHONE]'
  },
  {
    name: 'EMAIL',
    pattern: /\b[\w.-]+@[\w.-]+\.\w{2,}\b/gi,
    replacement: '[EMAIL]'
  },
  {
    name: 'BANK_ACCOUNT',
    pattern: /\b\d{8,17}\b/g,  // Bank account numbers typically 8-17 digits
    replacement: '[ACCOUNT_NUMBER]',
    context: ['account', 'routing', 'bank', 'transfer']  // Only match if near these words
  },
  {
    name: 'ADDRESS',
    pattern: /\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|boulevard|blvd|drive|dr|lane|ln|way|court|ct)\b,?\s*[\w\s]*,?\s*[A-Z]{2}\s*\d{5}(?:-\d{4})?/gi,
    replacement: '[ADDRESS]'
  }
];

export class PIIScrubber {
  private preserveEmailDomain: boolean;
  
  constructor(options: { preserveEmailDomain?: boolean } = {}) {
    this.preserveEmailDomain = options.preserveEmailDomain ?? false;
  }
  
  scrub(text: string): { clean: string; findings: PIIMatch[] } {
    const findings: PIIMatch[] = [];
    let clean = text;
    
    for (const { name, pattern, replacement, context } of PII_PATTERNS) {
      // Reset regex state
      pattern.lastIndex = 0;
      
      let match;
      while ((match = pattern.exec(text)) !== null) {
        // Context check for patterns that need it
        if (context) {
          const surrounding = text.slice(
            Math.max(0, match.index - 50),
            Math.min(text.length, match.index + match[0].length + 50)
          ).toLowerCase();
          
          const hasContext = context.some(c => surrounding.includes(c));
          if (!hasContext) continue;
        }
        
        let finalReplacement = replacement;
        
        // Special handling for emails - optionally keep domain
        if (name === 'EMAIL' && this.preserveEmailDomain) {
          const domain = match[0].split('@')[1];
          finalReplacement = `[EMAIL]@${domain}`;
        }
        
        findings.push({
          type: name,
          start: match.index,
          end: match.index + match[0].length,
          original: match[0],
          replacement: finalReplacement
        });
      }
    }
    
    // Apply replacements in reverse order to preserve indices
    for (const finding of findings.sort((a, b) => b.start - a.start)) {
      clean = clean.slice(0, finding.start) + 
              finding.replacement + 
              clean.slice(finding.end);
    }
    
    return { clean, findings };
  }
}

// Usage
const scrubber = new PIIScrubber({ preserveEmailDomain: true });
const result = scrubber.scrub(`
  Hi, my name is John and my card 4000-1234-5678-9010 was declined.
  Please call me at 555-123-4567 or email [email protected].
  My SSN is 123-45-6789 for verification.
`);

console.log(result.clean);
// Hi, my name is John and my card [CREDIT_CARD] was declined.
// Please call me at [PHONE] or email [EMAIL]@example.com.
// My SSN is [SSN] for verification.

console.log(result.findings);
// [
//   { type: 'CREDIT_CARD', original: '4000-1234-5678-9010', ... },
//   { type: 'PHONE', original: '555-123-4567', ... },
//   { type: 'EMAIL', original: '[email protected]', ... },
//   { type: 'SSN', original: '123-45-6789', ... }
// ]
Defense in Depth

PII scrubbing is a best-effort defense. Use it alongside API-level protections (Google's enterprise privacy guarantees), access controls, and audit logging. Never rely on a single layer.

Building an Evaluation Set

Before launching, build a labeled test set to measure performance:

// evaluation-set.ts
interface EvaluationCase {
  id: string;
  email: string;
  
  // Ground truth labels
  correctCategory: string;
  expectedFields: {
    order_ids?: string[];
    sentiment?: string;
    urgency?: string;
  };
  
  // For response quality
  acceptableResponse?: string;
  unacceptablePatterns?: string[];  // Phrases that should NOT appear
  
  // Metadata
  source: 'production' | 'synthetic' | 'edge_case';
  difficulty: 'easy' | 'medium' | 'hard';
}

const EVAL_SET: EvaluationCase[] = [
  {
    id: 'eval-001',
    email: 'Where is my order #12345?',
    correctCategory: 'SHIPPING_STATUS',
    expectedFields: {
      order_ids: ['12345'],
      sentiment: 'neutral',
      urgency: 'medium'
    },
    source: 'production',
    difficulty: 'easy'
  },
  {
    id: 'eval-002',
    email: `I've been waiting 3 weeks for my refund and nobody is helping me. 
            If I don't get my money back today I'm calling my lawyer.`,
    correctCategory: 'BILLING_REFUND',
    expectedFields: {
      sentiment: 'angry',
      urgency: 'urgent'
    },
    unacceptablePatterns: [
      'your refund has been processed',  // Can't confirm without checking
      'within 30 days',  // They're past 30 days
    ],
    source: 'production',
    difficulty: 'hard'
  },
  {
    id: 'eval-003',
    email: 'Can I get a copy of my receipt from my purchase last month?',
    correctCategory: 'BILLING_INVOICE',
    expectedFields: {
      sentiment: 'neutral',
      urgency: 'low'
    },
    source: 'synthetic',
    difficulty: 'easy'
  }
];

// Run evaluation
async function runEvaluation(
  classifier: EmailClassifier,
  extractor: TicketDataExtractor,
  evalSet: EvaluationCase[]
): Promise {
  const results = await Promise.all(
    evalSet.map(async (testCase) => {
      const classification = await classifier.classify(testCase.email);
      const extraction = await extractor.extract(testCase.email);
      
      return {
        id: testCase.id,
        categoryCorrect: classification.category === testCase.correctCategory,
        predictedCategory: classification.category,
        confidence: classification.confidence,
        extractionCorrect: checkExtraction(extraction, testCase.expectedFields),
        difficulty: testCase.difficulty
      };
    })
  );
  
  return {
    accuracy: results.filter(r => r.categoryCorrect).length / results.length,
    byDifficulty: {
      easy: calculateAccuracy(results.filter(r => r.difficulty === 'easy')),
      medium: calculateAccuracy(results.filter(r => r.difficulty === 'medium')),
      hard: calculateAccuracy(results.filter(r => r.difficulty === 'hard'))
    },
    confusionMatrix: buildConfusionMatrix(results, evalSet),
    failures: results.filter(r => !r.categoryCorrect)
  };
}

Key Metrics to Track

Metric What It Measures Target
Classification Accuracy % of emails correctly categorized >90% for production
Auto-Reply Rate % of tickets handled automatically 30-50% is good
Draft Approval Rate % of AI drafts accepted by humans >85%
False Auto-Reply Rate Auto-replies that were wrong <1%< /td>
Escalation Rate % routed to human Depends on use case
Response Time API latency for classification <500ms p95
// metrics-tracking.ts
interface DailyMetrics {
  date: string;
  totalTickets: number;
  autoReplied: number;
  draftedForReview: number;
  escalatedToHuman: number;
  
  // Quality metrics
  draftsApproved: number;
  draftsEdited: number;
  draftsRejected: number;
  
  // Performance
  avgClassificationTime: number;
  avgConfidence: number;
  
  // Errors
  classificationErrors: number;  // Human corrected the category
  responseErrors: number;        // Customer complained about response
}

function trackMetrics(decision: TicketDecision, humanAction?: HumanAction) {
  // Log to your analytics system
  analytics.track('ticket_processed', {
    action: decision.action,
    category: decision.queue,
    confidence: decision.confidence,
    wasApproved: humanAction?.approved,
    wasEdited: humanAction?.edited,
    responseTime: decision.processingTime
  });
}

Monitoring for Drift

Customer language changes over time. A new product launch, trending issue, or seasonal event can confuse the model:

// drift-detection.ts
interface DriftAlert {
  type: 'accuracy_drop' | 'confidence_drop' | 'category_shift' | 'unknown_spike';
  severity: 'low' | 'medium' | 'high';
  message: string;
  recommendation: string;
}

function detectDrift(
  current: DailyMetrics,
  baseline: DailyMetrics[]
): DriftAlert[] {
  const alerts: DriftAlert[] = [];
  
  // Calculate baseline averages
  const avgAccuracy = average(baseline.map(b => b.draftsApproved / (b.draftsApproved + b.draftsRejected)));
  const avgConfidence = average(baseline.map(b => b.avgConfidence));
  
  // Check accuracy drop
  const currentAccuracy = current.draftsApproved / (current.draftsApproved + current.draftsRejected);
  if (currentAccuracy < avgAccuracy - 0.1) {
    alerts.push({
      type: 'accuracy_drop',
      severity: currentAccuracy < avgAccuracy - 0.2 ? 'high' : 'medium',
      message: `Accuracy dropped from ${(avgAccuracy*100).toFixed(1)}% to ${(currentAccuracy*100).toFixed(1)}%`,
      recommendation: 'Review recent failures and add to eval set'
    });
  }
  
  // Check confidence drop
  if (current.avgConfidence < avgConfidence - 0.1) {
    alerts.push({
      type: 'confidence_drop',
      severity: 'medium',
      message: `Average confidence dropped to ${(current.avgConfidence*100).toFixed(1)}%`,
      recommendation: 'Model may be seeing unfamiliar patterns - review "OTHER" category'
    });
  }
  
  // Check spike in "OTHER" category
  const otherRate = current.escalatedToHuman / current.totalTickets;
  const avgOtherRate = average(baseline.map(b => b.escalatedToHuman / b.totalTickets));
  if (otherRate > avgOtherRate * 1.5) {
    alerts.push({
      type: 'unknown_spike',
      severity: 'high',
      message: `"OTHER" category up ${((otherRate/avgOtherRate - 1) * 100).toFixed(0)}% from baseline`,
      recommendation: 'New issue type emerging - review and add new category if needed'
    });
  }
  
  return alerts;
}
Weekly Eval Reviews

Re-run your evaluation set weekly. If accuracy drops, investigate recent failures. You may need to update prompts, add examples, or create new categories.

Where to go next