35.5 Evaluation and privacy controls
Overview and links for this section of the guide.
On this page
PII Redaction
Customer emails contain sensitive data that should never reach the LLM:
- Credit card numbers: 4000-1234-5678-9010
- Social Security Numbers: 123-45-6789
- Phone numbers: +1 (555) 123-4567
- Physical addresses: 123 Main St, Anytown, USA
- Email addresses: (Sometimes keep, sometimes redact)
- Account numbers: Bank accounts, routing numbers
The model doesn't need these to understand the question. "My card ending in [REDACTED] was declined" is just as useful as the full number.
Implementing the Scrubber
// pii-scrubber.ts
interface PIIMatch {
type: string;
start: number;
end: number;
original: string;
replacement: string;
}
const PII_PATTERNS = [
{
name: 'CREDIT_CARD',
pattern: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
replacement: '[CREDIT_CARD]'
},
{
name: 'SSN',
pattern: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
replacement: '[SSN]'
},
{
name: 'PHONE',
pattern: /(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
replacement: '[PHONE]'
},
{
name: 'EMAIL',
pattern: /\b[\w.-]+@[\w.-]+\.\w{2,}\b/gi,
replacement: '[EMAIL]'
},
{
name: 'BANK_ACCOUNT',
pattern: /\b\d{8,17}\b/g, // Bank account numbers typically 8-17 digits
replacement: '[ACCOUNT_NUMBER]',
context: ['account', 'routing', 'bank', 'transfer'] // Only match if near these words
},
{
name: 'ADDRESS',
pattern: /\d+\s+[\w\s]+(?:street|st|avenue|ave|road|rd|boulevard|blvd|drive|dr|lane|ln|way|court|ct)\b,?\s*[\w\s]*,?\s*[A-Z]{2}\s*\d{5}(?:-\d{4})?/gi,
replacement: '[ADDRESS]'
}
];
export class PIIScrubber {
private preserveEmailDomain: boolean;
constructor(options: { preserveEmailDomain?: boolean } = {}) {
this.preserveEmailDomain = options.preserveEmailDomain ?? false;
}
scrub(text: string): { clean: string; findings: PIIMatch[] } {
const findings: PIIMatch[] = [];
let clean = text;
for (const { name, pattern, replacement, context } of PII_PATTERNS) {
// Reset regex state
pattern.lastIndex = 0;
let match;
while ((match = pattern.exec(text)) !== null) {
// Context check for patterns that need it
if (context) {
const surrounding = text.slice(
Math.max(0, match.index - 50),
Math.min(text.length, match.index + match[0].length + 50)
).toLowerCase();
const hasContext = context.some(c => surrounding.includes(c));
if (!hasContext) continue;
}
let finalReplacement = replacement;
// Special handling for emails - optionally keep domain
if (name === 'EMAIL' && this.preserveEmailDomain) {
const domain = match[0].split('@')[1];
finalReplacement = `[EMAIL]@${domain}`;
}
findings.push({
type: name,
start: match.index,
end: match.index + match[0].length,
original: match[0],
replacement: finalReplacement
});
}
}
// Apply replacements in reverse order to preserve indices
for (const finding of findings.sort((a, b) => b.start - a.start)) {
clean = clean.slice(0, finding.start) +
finding.replacement +
clean.slice(finding.end);
}
return { clean, findings };
}
}
// Usage
const scrubber = new PIIScrubber({ preserveEmailDomain: true });
const result = scrubber.scrub(`
Hi, my name is John and my card 4000-1234-5678-9010 was declined.
Please call me at 555-123-4567 or email [email protected].
My SSN is 123-45-6789 for verification.
`);
console.log(result.clean);
// Hi, my name is John and my card [CREDIT_CARD] was declined.
// Please call me at [PHONE] or email [EMAIL]@example.com.
// My SSN is [SSN] for verification.
console.log(result.findings);
// [
// { type: 'CREDIT_CARD', original: '4000-1234-5678-9010', ... },
// { type: 'PHONE', original: '555-123-4567', ... },
// { type: 'EMAIL', original: '[email protected]', ... },
// { type: 'SSN', original: '123-45-6789', ... }
// ]
PII scrubbing is a best-effort defense. Use it alongside API-level protections (Google's enterprise privacy guarantees), access controls, and audit logging. Never rely on a single layer.
Building an Evaluation Set
Before launching, build a labeled test set to measure performance:
// evaluation-set.ts
interface EvaluationCase {
id: string;
email: string;
// Ground truth labels
correctCategory: string;
expectedFields: {
order_ids?: string[];
sentiment?: string;
urgency?: string;
};
// For response quality
acceptableResponse?: string;
unacceptablePatterns?: string[]; // Phrases that should NOT appear
// Metadata
source: 'production' | 'synthetic' | 'edge_case';
difficulty: 'easy' | 'medium' | 'hard';
}
const EVAL_SET: EvaluationCase[] = [
{
id: 'eval-001',
email: 'Where is my order #12345?',
correctCategory: 'SHIPPING_STATUS',
expectedFields: {
order_ids: ['12345'],
sentiment: 'neutral',
urgency: 'medium'
},
source: 'production',
difficulty: 'easy'
},
{
id: 'eval-002',
email: `I've been waiting 3 weeks for my refund and nobody is helping me.
If I don't get my money back today I'm calling my lawyer.`,
correctCategory: 'BILLING_REFUND',
expectedFields: {
sentiment: 'angry',
urgency: 'urgent'
},
unacceptablePatterns: [
'your refund has been processed', // Can't confirm without checking
'within 30 days', // They're past 30 days
],
source: 'production',
difficulty: 'hard'
},
{
id: 'eval-003',
email: 'Can I get a copy of my receipt from my purchase last month?',
correctCategory: 'BILLING_INVOICE',
expectedFields: {
sentiment: 'neutral',
urgency: 'low'
},
source: 'synthetic',
difficulty: 'easy'
}
];
// Run evaluation
async function runEvaluation(
classifier: EmailClassifier,
extractor: TicketDataExtractor,
evalSet: EvaluationCase[]
): Promise {
const results = await Promise.all(
evalSet.map(async (testCase) => {
const classification = await classifier.classify(testCase.email);
const extraction = await extractor.extract(testCase.email);
return {
id: testCase.id,
categoryCorrect: classification.category === testCase.correctCategory,
predictedCategory: classification.category,
confidence: classification.confidence,
extractionCorrect: checkExtraction(extraction, testCase.expectedFields),
difficulty: testCase.difficulty
};
})
);
return {
accuracy: results.filter(r => r.categoryCorrect).length / results.length,
byDifficulty: {
easy: calculateAccuracy(results.filter(r => r.difficulty === 'easy')),
medium: calculateAccuracy(results.filter(r => r.difficulty === 'medium')),
hard: calculateAccuracy(results.filter(r => r.difficulty === 'hard'))
},
confusionMatrix: buildConfusionMatrix(results, evalSet),
failures: results.filter(r => !r.categoryCorrect)
};
}
Key Metrics to Track
| Metric | What It Measures | Target |
|---|---|---|
| Classification Accuracy | % of emails correctly categorized | >90% for production |
| Auto-Reply Rate | % of tickets handled automatically | 30-50% is good |
| Draft Approval Rate | % of AI drafts accepted by humans | >85% |
| False Auto-Reply Rate | Auto-replies that were wrong | <1%< /td> |
| Escalation Rate | % routed to human | Depends on use case |
| Response Time | API latency for classification | <500ms p95 |
// metrics-tracking.ts
interface DailyMetrics {
date: string;
totalTickets: number;
autoReplied: number;
draftedForReview: number;
escalatedToHuman: number;
// Quality metrics
draftsApproved: number;
draftsEdited: number;
draftsRejected: number;
// Performance
avgClassificationTime: number;
avgConfidence: number;
// Errors
classificationErrors: number; // Human corrected the category
responseErrors: number; // Customer complained about response
}
function trackMetrics(decision: TicketDecision, humanAction?: HumanAction) {
// Log to your analytics system
analytics.track('ticket_processed', {
action: decision.action,
category: decision.queue,
confidence: decision.confidence,
wasApproved: humanAction?.approved,
wasEdited: humanAction?.edited,
responseTime: decision.processingTime
});
}
Monitoring for Drift
Customer language changes over time. A new product launch, trending issue, or seasonal event can confuse the model:
// drift-detection.ts
interface DriftAlert {
type: 'accuracy_drop' | 'confidence_drop' | 'category_shift' | 'unknown_spike';
severity: 'low' | 'medium' | 'high';
message: string;
recommendation: string;
}
function detectDrift(
current: DailyMetrics,
baseline: DailyMetrics[]
): DriftAlert[] {
const alerts: DriftAlert[] = [];
// Calculate baseline averages
const avgAccuracy = average(baseline.map(b => b.draftsApproved / (b.draftsApproved + b.draftsRejected)));
const avgConfidence = average(baseline.map(b => b.avgConfidence));
// Check accuracy drop
const currentAccuracy = current.draftsApproved / (current.draftsApproved + current.draftsRejected);
if (currentAccuracy < avgAccuracy - 0.1) {
alerts.push({
type: 'accuracy_drop',
severity: currentAccuracy < avgAccuracy - 0.2 ? 'high' : 'medium',
message: `Accuracy dropped from ${(avgAccuracy*100).toFixed(1)}% to ${(currentAccuracy*100).toFixed(1)}%`,
recommendation: 'Review recent failures and add to eval set'
});
}
// Check confidence drop
if (current.avgConfidence < avgConfidence - 0.1) {
alerts.push({
type: 'confidence_drop',
severity: 'medium',
message: `Average confidence dropped to ${(current.avgConfidence*100).toFixed(1)}%`,
recommendation: 'Model may be seeing unfamiliar patterns - review "OTHER" category'
});
}
// Check spike in "OTHER" category
const otherRate = current.escalatedToHuman / current.totalTickets;
const avgOtherRate = average(baseline.map(b => b.escalatedToHuman / b.totalTickets));
if (otherRate > avgOtherRate * 1.5) {
alerts.push({
type: 'unknown_spike',
severity: 'high',
message: `"OTHER" category up ${((otherRate/avgOtherRate - 1) * 100).toFixed(0)}% from baseline`,
recommendation: 'New issue type emerging - review and add new category if needed'
});
}
return alerts;
}
Re-run your evaluation set weekly. If accuracy drops, investigate recent failures. You may need to update prompts, add examples, or create new categories.