37.1 What an "agent" is in practice

Overview and links for this section of the guide.

The ReAct Pattern

ReAct (Reason + Act) is the foundational pattern for agents. The model alternates between reasoning about what to do and taking actions:

┌─────────────────────────────────────────────────────────────────┐
│                       ReAct LOOP                                 │
├─────────────────────────────────────────────────────────────────┤
│                                                                  │
│   ┌───────────┐         ┌───────────┐         ┌───────────┐    │
│   │OBSERVATION│ ──────▶ │  THOUGHT  │ ──────▶ │  ACTION   │    │
│   └───────────┘         └───────────┘         └───────────┘    │
│        ▲                                            │           │
│        │                                            │           │
│        └────────────────────────────────────────────┘           │
│                                                                  │
│   Example:                                                       │
│   Observation: "User asked to find TODO comments in the repo"   │
│   Thought: "I should search for TODO in all source files"       │
│   Action: grep_search("TODO", "src/")                           │
│   Observation: "Found 5 TODOs in 3 files"                       │
│   Thought: "I should format these results for the user"         │
│   Action: format_response(results)                              │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘

Implementation

// react-agent.ts
import { GoogleGenerativeAI } from '@google/generative-ai';

interface Tool {
  name: string;
  description: string;
  parameters: object;
  execute: (params: any) => Promise;
}

interface AgentStep {
  thought: string;
  action: { tool: string; params: any } | null;
  observation: string;
}

export class ReActAgent {
  private model: any;
  private tools: Map;
  private maxSteps: number;
  
  constructor(apiKey: string, tools: Tool[], maxSteps = 10) {
    const genAI = new GoogleGenerativeAI(apiKey);
    this.model = genAI.getGenerativeModel({ model: 'gemini-1.5-pro' });
    this.tools = new Map(tools.map(t => [t.name, t]));
    this.maxSteps = maxSteps;
  }
  
  async run(task: string): Promise<{ result: string; steps: AgentStep[] }> {
    const steps: AgentStep[] = [];
    let context = this.buildInitialPrompt(task);
    
    for (let i = 0; i < this.maxSteps; i++) {
      // Get model's thought and action decision
      const response = await this.model.generateContent(context);
      const parsed = this.parseResponse(response.response.text());
      
      // Check for completion
      if (parsed.action === null) {
        return { result: parsed.thought, steps };
      }
      
      // Execute the tool
      const tool = this.tools.get(parsed.action.tool);
      if (!tool) {
        throw new Error(`Unknown tool: ${parsed.action.tool}`);
      }
      
      const observation = await tool.execute(parsed.action.params);
      
      // Record the step
      steps.push({
        thought: parsed.thought,
        action: parsed.action,
        observation: JSON.stringify(observation)
      });
      
      // Update context for next iteration
      context += `\nThought: ${parsed.thought}`;
      context += `\nAction: ${parsed.action.tool}(${JSON.stringify(parsed.action.params)})`;
      context += `\nObservation: ${JSON.stringify(observation)}\n`;
    }
    
    throw new Error('Agent exceeded max steps');
  }
  
  private buildInitialPrompt(task: string): string {
    const toolDescriptions = Array.from(this.tools.values())
      .map(t => `- ${t.name}: ${t.description}`)
      .join('\n');
    
    return `You are an agent that completes tasks step by step.

Available tools:
${toolDescriptions}

Format your response as:
Thought: [your reasoning about what to do next]
Action: [tool_name]({"param": "value"})

When the task is complete, respond with:
Thought: [final answer to the user]
Action: none

Task: ${task}
`;
  }
}

State Management

An agent maintains a "scratchpad"—the running context of everything it has done:

// scratchpad.ts
interface ScratchpadEntry {
  timestamp: number;
  type: 'thought' | 'action' | 'observation' | 'error';
  content: string;
  metadata?: any;
}

export class Scratchpad {
  private entries: ScratchpadEntry[] = [];
  private tokenLimit: number;
  
  constructor(tokenLimit = 100000) {
    this.tokenLimit = tokenLimit;
  }
  
  add(type: ScratchpadEntry['type'], content: string, metadata?: any) {
    this.entries.push({
      timestamp: Date.now(),
      type,
      content,
      metadata
    });
    
    // Compress if exceeding limit
    if (this.estimateTokens() > this.tokenLimit) {
      this.compress();
    }
  }
  
  // Summarize old entries to save tokens
  private compress() {
    const oldEntries = this.entries.slice(0, -10);
    const recentEntries = this.entries.slice(-10);
    
    const summary = `[Previous ${oldEntries.length} steps summarized: ` +
      oldEntries.filter(e => e.type === 'action')
        .map(e => e.content.split('(')[0])
        .join(', ') + ']';
    
    this.entries = [
      { timestamp: Date.now(), type: 'observation', content: summary },
      ...recentEntries
    ];
  }
  
  format(): string {
    return this.entries.map(e => `${e.type}: ${e.content}`).join('\n');
  }
}

Tool Calling

Tools are the agent's interface to the world. Each tool has a clear contract:

// tools.ts
export const fileTools: Tool[] = [
  {
    name: 'read_file',
    description: 'Read the contents of a file',
    parameters: {
      type: 'object',
      properties: {
        path: { type: 'string', description: 'Path to the file' },
        lines: { type: 'number', description: 'Max lines to read' }
      },
      required: ['path']
    },
    execute: async ({ path, lines = 100 }) => {
      const content = await fs.readFile(path, 'utf8');
      const lineArray = content.split('\n');
      return lineArray.slice(0, lines).join('\n');
    }
  },
  {
    name: 'list_directory',
    description: 'List files in a directory',
    parameters: {
      type: 'object',
      properties: {
        path: { type: 'string' },
        pattern: { type: 'string', description: 'Glob pattern filter' }
      },
      required: ['path']
    },
    execute: async ({ path, pattern = '*' }) => {
      return glob.sync(pattern, { cwd: path });
    }
  },
  {
    name: 'search_code',
    description: 'Search for a pattern in code files',
    parameters: {
      type: 'object',
      properties: {
        query: { type: 'string' },
        path: { type: 'string', description: 'Directory to search' }
      },
      required: ['query', 'path']
    },
    execute: async ({ query, path }) => {
      const result = execSync(`grep -r "${query}" ${path} 2>/dev/null || true`);
      return result.toString().slice(0, 5000);  // Limit output
    }
  }
];
Limit Tool Output

Always cap tool output size. A grep returning 10MB of matches will blow up your context. Truncate and tell the agent "results truncated, refine your search."

Where to go next