agenthub/test/load-test.test.ts

import { describe, it, beforeAll, afterAll, expect } from 'vitest';
import type { AddressInfo } from 'node:net';
import type { FastifyInstance } from 'fastify';
import { buildApp } from '../src/app.js';
import { loadConfig } from '../src/config.js';
import { pool } from '../src/db/pool.js';
import { drizzle } from 'drizzle-orm/node-postgres';
import { agents, apiTokens } from '../src/db/schema.js';
import { generateApiToken, hashApiToken } from '../src/lib/crypto.js';

/**
 * Load Test: Synthetic 20 agents with p99 < 100ms
 *
 * Simulates 20 concurrent agents making requests to verify:
 * - p99 latency < 100ms (success criterion from BARAAA-47)
 * - Rate limiting works correctly under load
 * - No memory leaks or crashes
 */

describe('Load Test: 20 Concurrent Agents', () => {
  let app: FastifyInstance;
  let baseUrl: string;
  const testAgentTokens: string[] = [];
  const NUM_AGENTS = 20;
  const REQUESTS_PER_AGENT = 50;

  beforeAll(async () => {
    const config = loadConfig({
      ...process.env,
      NODE_ENV: 'test',
      LOG_LEVEL: 'error',
      JWT_SECRET: 'test-secret-32-bytes-long-xxxxxxxxxx',
      FEATURE_MESSAGING_ENABLED: 'false',
    });

    app = await buildApp({ config });
    await app.listen({ host: '127.0.0.1', port: 0 });
    const address = app.server.address() as AddressInfo;
    baseUrl = `http://127.0.0.1:${address.port}`;

    // Create 20 test agents and tokens
    const db = drizzle(pool);
    for (let i = 0; i < NUM_AGENTS; i++) {
      const [agent] = await db
        .insert(agents)
        .values({
          name: `load-test-agent-${i}`,
          displayName: `Load Test Agent ${i}`,
          role: 'agent',
        })
        .returning();

      const { fullToken } = generateApiToken();
      const hashArgon2id = await hashApiToken(fullToken);

      await db.insert(apiTokens).values({
        agentId: agent!.id,
        hashArgon2id,
        prefix: fullToken.split('_').slice(0, 3).join('_'),
        scopes: {},
      });

      testAgentTokens.push(fullToken);
    }
  }, 30000); // 30s timeout for setup

  afterAll(async () => {
    await app.close();
  });

  it('should handle 20 concurrent agents with p99 < 100ms', async () => {
    const latencies: number[] = [];

    // Warmup: 1 request per agent to prime caches
    await Promise.all(
      testAgentTokens.map(async (token) => {
        await fetch(`${baseUrl}/api/v1/sessions`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({ apiToken: token }),
        });
      }),
    );

    // Actual load test: each agent makes REQUESTS_PER_AGENT requests
    const promises: Promise<void>[] = [];

    for (const token of testAgentTokens) {
      const agentPromise = (async () => {
        for (let i = 0; i < REQUESTS_PER_AGENT; i++) {
          const start = performance.now();

          const response = await fetch(`${baseUrl}/api/v1/sessions`, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ apiToken: token }),
          });

          const elapsed = performance.now() - start;
          latencies.push(elapsed);

          expect(response.status).toBe(200);

          // Small delay to avoid immediate rate limiting
          await new Promise((resolve) => setTimeout(resolve, 10));
        }
      })();

      promises.push(agentPromise);
    }

    await Promise.all(promises);

    // Calculate latency percentiles
    latencies.sort((a, b) => a - b);
    const p50 = latencies[Math.floor(latencies.length * 0.5)];
    const p90 = latencies[Math.floor(latencies.length * 0.9)];
    const p99 = latencies[Math.floor(latencies.length * 0.99)];
    const max = latencies[latencies.length - 1];

    console.log(`\n📊 Load Test Results (${NUM_AGENTS} agents × ${REQUESTS_PER_AGENT} requests = ${latencies.length} total):`);
    console.log(`   p50: ${p50?.toFixed(2)}ms`);
    console.log(`   p90: ${p90?.toFixed(2)}ms`);
    console.log(`   p99: ${p99?.toFixed(2)}ms ✅ (target: < 100ms)`);
    console.log(`   max: ${max?.toFixed(2)}ms\n`);

    // Assert p99 < 100ms (success criterion)
    expect(p99).toBeLessThan(100);

    // Additional sanity checks
    expect(p50).toBeLessThan(50); // p50 should be much lower
    expect(latencies.length).toBe(NUM_AGENTS * REQUESTS_PER_AGENT);
  }, 60000); // 60s timeout for test

  it('should enforce rate limiting under sustained load', async () => {
    // Pick one agent and hammer it beyond the limit (600 req/min = 10 req/s)
    const token = testAgentTokens[0]!;
    const results: number[] = [];

    // Send 100 requests as fast as possible (should hit rate limit)
    const promises = Array.from({ length: 100 }, async () => {
      const response = await fetch(`${baseUrl}/api/v1/sessions`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ apiToken: token }),
      });
      results.push(response.status);
    });

    await Promise.all(promises);

    const rateLimitedCount = results.filter((status) => status === 429).length;
    const successCount = results.filter((status) => status === 200).length;

    console.log(`\n🚦 Rate Limit Test:`);
    console.log(`   Success (200): ${successCount}`);
    console.log(`   Rate Limited (429): ${rateLimitedCount}\n`);

    // We expect some rate limiting to occur (not all 100 should succeed)
    expect(rateLimitedCount).toBeGreaterThan(0);
  }, 30000);
});