44.1 Data classification and what can enter prompts

Overview and links for this section of the guide.

Classification Levels

Level Description Can Send to LLM?
Public Marketing copy, docs Yes
Internal Internal processes With care
Confidential Customer data, PII Redacted only
Restricted Secrets, credentials, financials Never

Processing Rules

// data-classification.ts
type DataClass = 'public' | 'internal' | 'confidential' | 'restricted';

const PII_PATTERNS = [
  { pattern: /\b[A-Z][a-z]+ [A-Z][a-z]+\b/, type: 'name' },
  { pattern: /\b[\w.+-]+@[\w-]+\.[\w.-]+\b/, type: 'email' },
  { pattern: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, type: 'phone' },
  { pattern: /\b\d{3}[-]?\d{2}[-]?\d{4}\b/, type: 'ssn' },
  { pattern: /\b\d{16}\b/, type: 'credit_card' },
];

function classifyData(text: string): DataClass {
  for (const { pattern, type } of PII_PATTERNS) {
    if (pattern.test(text)) {
      console.log(`Found ${type}, classifying as confidential`);
      return 'confidential';
    }
  }
  return 'public';
}

Implementation

function prepareForLLM(data: string, classification: DataClass): string {
  switch (classification) {
    case 'restricted':
      throw new Error('Restricted data cannot be sent to LLM');
    case 'confidential':
      return redactPII(data);
    default:
      return data;
  }
}

function redactPII(text: string): string {
  let redacted = text;
  for (const { pattern, type } of PII_PATTERNS) {
    redacted = redacted.replace(pattern, `[REDACTED_${type.toUpperCase()}]`);
  }
  return redacted;
}

Where to go next