35.5 Evaluation and privacy controls

On this page

PII Redaction
Implementing the Scrubber
Building an Evaluation Set
Key Metrics to Track
Monitoring for Drift
Where to go next

PII Redaction

Customer emails contain sensitive data that should never reach the LLM:

Credit card numbers: 4000-1234-5678-9010
Social Security Numbers: 123-45-6789
Phone numbers: +1 (555) 123-4567
Physical addresses: 123 Main St, Anytown, USA
Email addresses: (Sometimes keep, sometimes redact)
Account numbers: Bank accounts, routing numbers

The model doesn't need these to understand the question. "My card ending in [REDACTED] was declined" is just as useful as the full number.

Implementing the Scrubber

// pii-scrubber.ts
interface PIIMatch {
  type: string;
  start: number;
  end: number;
  original: string;
  replacement: string;
}

const PII_PATTERNS = [
  {
    name: 'CREDIT_CARD',
    pattern: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
    replacement: '[CREDIT_CARD]'
  },
  {
    name: 'SSN',
    pattern: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
    replacement: '[SSN]'
  },
  {
    name: 'PHONE',
    pattern: /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
    replacement: '[PHONE]'
  },
  {
    name: 'EMAIL',
    pattern: /\b[\w.-]+@[\w.-]+\.\w{2,}\b/gi,
    replacement: '[EMAIL]'
  },
  {
    name: 'BANK_ACCOUNT',
    pattern: /\b\d{8,17}\b/g,  // Bank account numbers typically 8-17 digits
    replacement: '[ACCOUNT_NUMBER]',
    context: ['account', 'routing', 'bank', 'transfer']  // Only match if near these words
  },
  {
    name: 'ADDRESS',
    pattern: /\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|boulevard|blvd|drive|dr|lane|ln|way|court|ct)\b,?\s*[\w\s]*,?\s*[A-Z]{2}\s*\d{5}(?:-\d{4})?/gi,
    replacement: '[ADDRESS]'
  }
];

export class PIIScrubber {
  private preserveEmailDomain: boolean;
  
  constructor(options: { preserveEmailDomain?: boolean } = {}) {
    this.preserveEmailDomain = options.preserveEmailDomain ?? false;
  }
  
  scrub(text: string): { clean: string; findings: PIIMatch[] } {
    const findings: PIIMatch[] = [];
    let clean = text;
    
    for (const { name, pattern, replacement, context } of PII_PATTERNS) {
      // Reset regex state
      pattern.lastIndex = 0;
      
      let match;
      while ((match = pattern.exec(text)) !== null) {
        // Context check for patterns that need it
        if (context) {
          const surrounding = text.slice(
            Math.max(0, match.index - 50),
            Math.min(text.length, match.index + match[0].length + 50)
          ).toLowerCase();
          
          const hasContext = context.some(c => surrounding.includes(c));
          if (!hasContext) continue;
        }
        
        let finalReplacement = replacement;
        
        // Special handling for emails - optionally keep domain
        if (name === 'EMAIL' && this.preserveEmailDomain) {
          const domain = match[0].split('@')[1];
          finalReplacement = `[EMAIL]@${domain}`;
        }
        
        findings.push({
          type: name,
          start: match.index,
          end: match.index + match[0].length,
          original: match[0],
          replacement: finalReplacement
        });
      }
    }
    
    // Apply replacements in reverse order to preserve indices
    for (const finding of findings.sort((a, b) => b.start - a.start)) {
      clean = clean.slice(0, finding.start) + 
              finding.replacement + 
              clean.slice(finding.end);
    }
    
    return { clean, findings };
  }
}

// Usage
const scrubber = new PIIScrubber({ preserveEmailDomain: true });
const result = scrubber.scrub(`
  Hi, my name is John and my card 4000-1234-5678-9010 was declined.
  Please call me at 555-123-4567 or email [email protected].
  My SSN is 123-45-6789 for verification.
`);

console.log(result.clean);
// Hi, my name is John and my card [CREDIT_CARD] was declined.
// Please call me at [PHONE] or email [EMAIL]@example.com.
// My SSN is [SSN] for verification.

console.log(result.findings);
// [
//   { type: 'CREDIT_CARD', original: '4000-1234-5678-9010', ... },
//   { type: 'PHONE', original: '555-123-4567', ... },
//   { type: 'EMAIL', original: '[email protected]', ... },
//   { type: 'SSN', original: '123-45-6789', ... }
// ]

Defense in Depth

PII scrubbing is a best-effort defense. Use it alongside API-level protections (Google's enterprise privacy guarantees), access controls, and audit logging. Never rely on a single layer.

Building an Evaluation Set

Before launching, build a labeled test set to measure performance:

// evaluation-set.ts
interface EvaluationCase {
  id: string;
  email: string;
  
  // Ground truth labels
  correctCategory: string;
  expectedFields: {
    order_ids?: string[];
    sentiment?: string;
    urgency?: string;
  };
  
  // For response quality
  acceptableResponse?: string;
  unacceptablePatterns?: string[];  // Phrases that should NOT appear
  
  // Metadata
  source: 'production' | 'synthetic' | 'edge_case';
  difficulty: 'easy' | 'medium' | 'hard';
}

const EVAL_SET: EvaluationCase[] = [
  {
    id: 'eval-001',
    email: 'Where is my order #12345?',
    correctCategory: 'SHIPPING_STATUS',
    expectedFields: {
      order_ids: ['12345'],
      sentiment: 'neutral',
      urgency: 'medium'
    },
    source: 'production',
    difficulty: 'easy'
  },
  {
    id: 'eval-002',
    email: `I've been waiting 3 weeks for my refund and nobody is helping me. 
            If I don't get my money back today I'm calling my lawyer.`,
    correctCategory: 'BILLING_REFUND',
    expectedFields: {
      sentiment: 'angry',
      urgency: 'urgent'
    },
    unacceptablePatterns: [
      'your refund has been processed',  // Can't confirm without checking
      'within 30 days',  // They're past 30 days
    ],
    source: 'production',
    difficulty: 'hard'
  },
  {
    id: 'eval-003',
    email: 'Can I get a copy of my receipt from my purchase last month?',
    correctCategory: 'BILLING_INVOICE',
    expectedFields: {
      sentiment: 'neutral',
      urgency: 'low'
    },
    source: 'synthetic',
    difficulty: 'easy'
  }
];

// Run evaluation
async function runEvaluation(
  classifier: EmailClassifier,
  extractor: TicketDataExtractor,
  evalSet: EvaluationCase[]
): Promise {
  const results = await Promise.all(
    evalSet.map(async (testCase) => {
      const classification = await classifier.classify(testCase.email);
      const extraction = await extractor.extract(testCase.email);
      
      return {
        id: testCase.id,
        categoryCorrect: classification.category === testCase.correctCategory,
        predictedCategory: classification.category,
        confidence: classification.confidence,
        extractionCorrect: checkExtraction(extraction, testCase.expectedFields),
        difficulty: testCase.difficulty
      };
    })
  );
  
  return {
    accuracy: results.filter(r => r.categoryCorrect).length / results.length,
    byDifficulty: {
      easy: calculateAccuracy(results.filter(r => r.difficulty === 'easy')),
      medium: calculateAccuracy(results.filter(r => r.difficulty === 'medium')),
      hard: calculateAccuracy(results.filter(r => r.difficulty === 'hard'))
    },
    confusionMatrix: buildConfusionMatrix(results, evalSet),
    failures: results.filter(r => !r.categoryCorrect)
  };
}

Key Metrics to Track

Metric	What It Measures	Target
Classification Accuracy	% of emails correctly categorized	>90% for production
Auto-Reply Rate	% of tickets handled automatically	30-50% is good
Draft Approval Rate	% of AI drafts accepted by humans	>85%
False Auto-Reply Rate	Auto-replies that were wrong	<1%< /td>
Escalation Rate	% routed to human	Depends on use case
Response Time	API latency for classification	<500ms p95

// metrics-tracking.ts
interface DailyMetrics {
  date: string;
  totalTickets: number;
  autoReplied: number;
  draftedForReview: number;
  escalatedToHuman: number;
  
  // Quality metrics
  draftsApproved: number;
  draftsEdited: number;
  draftsRejected: number;
  
  // Performance
  avgClassificationTime: number;
  avgConfidence: number;
  
  // Errors
  classificationErrors: number;  // Human corrected the category
  responseErrors: number;        // Customer complained about response
}

function trackMetrics(decision: TicketDecision, humanAction?: HumanAction) {
  // Log to your analytics system
  analytics.track('ticket_processed', {
    action: decision.action,
    category: decision.queue,
    confidence: decision.confidence,
    wasApproved: humanAction?.approved,
    wasEdited: humanAction?.edited,
    responseTime: decision.processingTime
  });
}

Monitoring for Drift

Customer language changes over time. A new product launch, trending issue, or seasonal event can confuse the model:

// drift-detection.ts
interface DriftAlert {
  type: 'accuracy_drop' | 'confidence_drop' | 'category_shift' | 'unknown_spike';
  severity: 'low' | 'medium' | 'high';
  message: string;
  recommendation: string;
}

function detectDrift(
  current: DailyMetrics,
  baseline: DailyMetrics[]
): DriftAlert[] {
  const alerts: DriftAlert[] = [];
  
  // Calculate baseline averages
  const avgAccuracy = average(baseline.map(b => b.draftsApproved / (b.draftsApproved + b.draftsRejected)));
  const avgConfidence = average(baseline.map(b => b.avgConfidence));
  
  // Check accuracy drop
  const currentAccuracy = current.draftsApproved / (current.draftsApproved + current.draftsRejected);
  if (currentAccuracy < avgAccuracy - 0.1) {
    alerts.push({
      type: 'accuracy_drop',
      severity: currentAccuracy < avgAccuracy - 0.2 ? 'high' : 'medium',
      message: `Accuracy dropped from ${(avgAccuracy*100).toFixed(1)}% to ${(currentAccuracy*100).toFixed(1)}%`,
      recommendation: 'Review recent failures and add to eval set'
    });
  }
  
  // Check confidence drop
  if (current.avgConfidence < avgConfidence - 0.1) {
    alerts.push({
      type: 'confidence_drop',
      severity: 'medium',
      message: `Average confidence dropped to ${(current.avgConfidence*100).toFixed(1)}%`,
      recommendation: 'Model may be seeing unfamiliar patterns - review "OTHER" category'
    });
  }
  
  // Check spike in "OTHER" category
  const otherRate = current.escalatedToHuman / current.totalTickets;
  const avgOtherRate = average(baseline.map(b => b.escalatedToHuman / b.totalTickets));
  if (otherRate > avgOtherRate * 1.5) {
    alerts.push({
      type: 'unknown_spike',
      severity: 'high',
      message: `"OTHER" category up ${((otherRate/avgOtherRate - 1) * 100).toFixed(0)}% from baseline`,
      recommendation: 'New issue type emerging - review and add new category if needed'
    });
  }
  
  return alerts;
}

Weekly Eval Reviews

Re-run your evaluation set weekly. If accuracy drops, investigate recent failures. You may need to update prompts, add examples, or create new categories.

Where to go next

36. Project 5: Data-to-Insights Analyst Tool