Skip to content

Testing Guide

Comprehensive testing strategy for Vows Social AI multi-agent system.


Testing Philosophy

Test Coverage Goals: - Unit Tests: 80%+ coverage - Integration Tests: Critical paths covered - E2E Tests: Core user flows validated - Performance Tests: Latency and throughput benchmarks


Unit Tests

Agent Testing

Testing Individual Agents:

import pytest
from unittest.mock import Mock, AsyncMock
from agents.quality_guardian import QualityGuardian

@pytest.mark.asyncio
async def test_quality_guardian_scoring():
    # Setup
    agent = QualityGuardian()

    mock_vendor = Mock(
        id='vendor-123',
        posts=[
            Mock(imageUrl='url1', qualityScore=0.9),
            Mock(imageUrl='url2', qualityScore=0.85)
        ],
        followerCount=5000,
        engagementRate=0.06
    )

    # Execute
    score = await agent.evaluateVendor(mock_vendor)

    # Assert
    assert 0.0 <= score.overall <= 1.0
    assert score.visual.overall > 0.8
    assert score.authenticity.score > 0.7

@pytest.mark.asyncio
async def test_quality_guardian_batch():
    agent = QualityGuardian()

    vendors = [
        Mock(id=f'vendor-{i}', qualityScore=0.8)
        for i in range(10)
    ]

    scores = await agent.batchEvaluate(vendors)

    assert len(scores) == 10
    assert all(0.0 <= s <= 1.0 for s in scores)

Testing Agent Tools:

@pytest.mark.asyncio
async def test_discovery_agent_search_tool():
    agent = DiscoveryAgent()

    results = await agent.searchVendors(
        query='garden venue melbourne',
        region='Melbourne',
        limit=20
    )

    assert len(results) <= 20
    assert all(v.region == 'Melbourne' for v in results)
    assert all(v.qualityScore > 0.0 for v in results)

Orchestrator Testing

import { describe, it, expect, vi } from 'vitest';
import { OrchestratorDO } from './orchestrator';

describe('Orchestrator', () => {
  it('should rank content using Thompson Sampling', async () => {
    const orchestrator = new OrchestratorDO(mockState, mockEnv);

    const candidates = [
      { id: 'content-1', qualityScore: 0.9 },
      { id: 'content-2', qualityScore: 0.8 },
      { id: 'content-3', qualityScore: 0.7 }
    ];

    const ranked = await orchestrator.rankFeed(
      mockContext,
      candidates
    );

    expect(ranked).toHaveLength(3);
    expect(ranked[0].score).toBeGreaterThan(ranked[1].score);
  });

  it('should update Thompson params on interaction', async () => {
    const orchestrator = new OrchestratorDO(mockState, mockEnv);

    await orchestrator.recordInteraction({
      contentId: 'content-1',
      action: 'save',
      success: true
    });

    const params = orchestrator.getThompsonParams('content-1');
    expect(params.alpha).toBeGreaterThan(1);
  });
});

Foundation Model Testing

import pytest
import numpy as np
from models.foundation_model import LightweightFoundationModel

def test_content_embedding():
    model = LightweightFoundationModel()

    text = "Beautiful garden wedding venue in Melbourne"
    embedding = model.get_content_embedding(text)

    assert embedding.shape == (384,)  # Sentence-BERT dimension
    assert -1 <= embedding.min() <= 1
    assert -1 <= embedding.max() <= 1

def test_user_embedding():
    model = LightweightFoundationModel()

    interactions = [
        "Modern minimalist wedding photography",
        "Elegant garden venue with natural light",
        "Contemporary floral arrangements"
    ]

    user_embedding = model.get_user_embedding(interactions)

    assert user_embedding.shape == (384,)
    assert np.linalg.norm(user_embedding) > 0  # Not zero vector

def test_cold_start():
    model = LightweightFoundationModel()

    # New user with no interactions
    user_embedding = model.get_cold_start_embedding(
        preferences=['modern', 'minimalist', 'garden']
    )

    assert user_embedding.shape == (384,)
    assert user_embedding is not None

Integration Tests

Multi-Agent Collaboration

@pytest.mark.asyncio
async def test_agent_crew_collaboration():
    # Setup crew
    orchestrator = Orchestrator()
    discovery = DiscoveryAgent()
    quality = QualityGuardian()
    archivist = PersonalArchivist()

    crew = Crew(
        agents=[orchestrator, discovery, quality, archivist],
        process=Process.hierarchical
    )

    # Execute
    result = await crew.kickoff({
        'user_id': 'test-user-123',
        'limit': 20
    })

    # Assert
    assert len(result.feed) == 20
    assert all(c.qualityScore > 0.7 for c in result.feed)
    assert result.metadata.agentScores is not None

@pytest.mark.asyncio
async def test_magrpo_policy_update():
    agents = [
        DiscoveryAgent(),
        QualityGuardian(),
        PersonalArchivist()
    ]

    # Initial proposals
    initial_proposals = await asyncio.gather(*[
        agent.propose(mock_context) for agent in agents
    ])

    # Simulate positive outcome
    advantage = 0.8

    # Update policies
    await asyncio.gather(*[
        agent.updatePolicy(advantage) for agent in agents
    ])

    # New proposals should be different (policy updated)
    new_proposals = await asyncio.gather(*[
        agent.propose(mock_context) for agent in agents
    ])

    assert new_proposals != initial_proposals

API Integration

import { describe, it, expect } from 'vitest';
import { app } from './server';

describe('Feed API', () => {
  it('should generate personalized feed', async () => {
    const response = await app.request(
      '/api/feed/user-123',
      { method: 'GET' }
    );

    expect(response.status).toBe(200);

    const data = await response.json();
    expect(data.feed).toHaveLength(20);
    expect(data.metadata.latency_ms).toBeLessThan(500);
  });

  it('should handle interaction logging', async () => {
    const response = await app.request('/api/feed/interaction', {
      method: 'POST',
      body: JSON.stringify({
        userId: 'user-123',
        contentId: 'content-456',
        action: 'save',
        duration: 5.2
      })
    });

    expect(response.status).toBe(200);

    const data = await response.json();
    expect(data.success).toBe(true);
    expect(data.thompsonUpdated).toBe(true);
  });
});

Database Integration

@pytest.mark.asyncio
async def test_qdrant_integration():
    from services.vector_db import QdrantService

    qdrant = QdrantService(url=TEST_QDRANT_URL, api_key=TEST_API_KEY)

    # Upload embeddings
    await qdrant.upsert(
        collection='test_collection',
        points=[
            {'id': 'content-1', 'vector': [0.1] * 384, 'payload': {'type': 'venue'}},
            {'id': 'content-2', 'vector': [0.2] * 384, 'payload': {'type': 'photo'}}
        ]
    )

    # Search
    results = await qdrant.search(
        collection='test_collection',
        vector=[0.15] * 384,
        limit=2
    )

    assert len(results) == 2
    assert results[0].id == 'content-1'  # Closer match

@pytest.mark.asyncio
async def test_supabase_integration():
    from services.database import SupabaseService

    db = SupabaseService(url=TEST_SUPABASE_URL, key=TEST_SUPABASE_KEY)

    # Insert interaction
    await db.insert('interactions', {
        'user_id': 'user-123',
        'content_id': 'content-456',
        'action': 'save',
        'timestamp': 'now()'
    })

    # Query interactions
    interactions = await db.select(
        'interactions',
        filters={'user_id': 'user-123'}
    )

    assert len(interactions) > 0
    assert interactions[0]['user_id'] == 'user-123'

E2E Tests

User Flow Testing

import { test, expect } from '@playwright/test';

test('user can browse and save content', async ({ page }) => {
  // Navigate to app
  await page.goto('http://localhost:3000');

  // Login
  await page.fill('input[name="email"]', 'test@example.com');
  await page.fill('input[name="password"]', 'password123');
  await page.click('button[type="submit"]');

  // Wait for feed to load
  await page.waitForSelector('.feed-item');

  // Should show personalized content
  const feedItems = await page.locator('.feed-item').count();
  expect(feedItems).toBeGreaterThan(0);

  // Save first item
  await page.locator('.feed-item').first().click();
  await page.click('button[aria-label="Save"]');

  // Verify saved
  await page.goto('http://localhost:3000/saved');
  const savedItems = await page.locator('.saved-item').count();
  expect(savedItems).toBe(1);
});

test('Thompson Sampling learns from interactions', async ({ page }) => {
  await page.goto('http://localhost:3000');

  // Record initial feed
  const initialFeed = await page.locator('.feed-item').allTextContents();

  // Interact with modern style content only
  for (let i = 0; i < 10; i++) {
    const modernItems = page.locator('.feed-item[data-style="modern"]');
    await modernItems.first().click();
    await page.click('button[aria-label="Save"]');
    await page.goBack();
  }

  // Refresh feed
  await page.reload();

  // New feed should have more modern content
  const modernCount = await page.locator('.feed-item[data-style="modern"]').count();
  expect(modernCount).toBeGreaterThan(5);
});

Performance Tests

Load Testing

import pytest
import asyncio
from locust import HttpUser, task, between

class FeedUser(HttpUser):
    wait_time = between(1, 3)

    @task
    def get_feed(self):
        self.client.get(f'/api/feed/user-{self.user_id}')

    @task(3)
    def record_interaction(self):
        self.client.post('/api/feed/interaction', json={
            'userId': f'user-{self.user_id}',
            'contentId': f'content-{random.randint(1, 1000)}',
            'action': random.choice(['view', 'save', 'share'])
        })

# Run with: locust -f performance_test.py --users 1000 --spawn-rate 10

Latency Benchmarks

import { describe, it, expect } from 'vitest';
import { performance } from 'perf_hooks';

describe('Performance Benchmarks', () => {
  it('feed generation < 500ms (p99)', async () => {
    const latencies = [];

    for (let i = 0; i < 100; i++) {
      const start = performance.now();
      await generateFeed('user-123');
      const end = performance.now();

      latencies.push(end - start);
    }

    latencies.sort((a, b) => a - b);
    const p99 = latencies[Math.floor(latencies.length * 0.99)];

    expect(p99).toBeLessThan(500);
  });

  it('agent scoring < 200ms (p99)', async () => {
    const latencies = [];
    const agent = new QualityGuardian();

    for (let i = 0; i < 100; i++) {
      const start = performance.now();
      await agent.score(mockCandidates, mockContext);
      const end = performance.now();

      latencies.push(end - start);
    }

    const p99 = latencies[Math.floor(latencies.length * 0.99)];
    expect(p99).toBeLessThan(200);
  });
});

Test Data

Fixtures

# tests/fixtures.py
import pytest

@pytest.fixture
def mock_user():
    return {
        'id': 'user-123',
        'location': { 'region': 'Melbourne' },
        'weddingDate': '2026-06-15',
        'style': 'modern_minimalist',
        'embedding': [0.1] * 384
    }

@pytest.fixture
def mock_content():
    return [
        {
            'id': 'content-1',
            'vendorId': 'vendor-1',
            'vendorType': 'photographer',
            'region': 'Melbourne',
            'qualityScore': 0.9,
            'embedding': [0.2] * 384
        },
        # ... more content
    ]

@pytest.fixture
async def test_db():
    # Setup test database
    db = await create_test_database()
    yield db
    # Teardown
    await db.cleanup()

Mock Data Generators

// tests/generators.ts
export function generateMockVendor(overrides = {}) {
  return {
    id: `vendor-${Math.random()}`,
    username: 'test_vendor',
    region: 'Melbourne',
    qualityScore: 0.8,
    followerCount: 5000,
    ...overrides
  };
}

export function generateMockInteraction(overrides = {}) {
  return {
    userId: 'user-123',
    contentId: 'content-456',
    action: 'view',
    duration: 3.5,
    timestamp: Date.now(),
    ...overrides
  };
}

CI/CD Integration

GitHub Actions

# .github/workflows/test.yml
name: Tests

on: [push, pull_request]

jobs:
  unit-tests:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: actions/setup-node@v3
      - run: npm ci
      - run: npm run test:unit
      - run: npm run test:coverage

  integration-tests:
    runs-on: ubuntu-latest
    services:
      postgres:
        image: postgres:14
      qdrant:
        image: qdrant/qdrant
    steps:
      - uses: actions/checkout@v3
      - run: npm ci
      - run: npm run test:integration

  e2e-tests:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - run: npm ci
      - run: npm run build
      - run: npm run test:e2e

Test Commands

{
  "scripts": {
    "test": "vitest",
    "test:unit": "vitest run --coverage",
    "test:integration": "vitest run --config vitest.integration.config.ts",
    "test:e2e": "playwright test",
    "test:performance": "locust -f tests/performance.py",
    "test:watch": "vitest watch"
  }
}

Best Practices

  1. Test Pyramid - More unit tests, fewer E2E tests
  2. Isolation - Tests should be independent
  3. Fast Feedback - Unit tests run in < 1 second
  4. Realistic Data - Use production-like test data
  5. Clean State - Reset state between tests
  6. Meaningful Assertions - Test behavior, not implementation

Resources