Testing Guide
Comprehensive testing strategy for Vows Social AI multi-agent system.
Testing Philosophy
Test Coverage Goals: - Unit Tests: 80%+ coverage - Integration Tests: Critical paths covered - E2E Tests: Core user flows validated - Performance Tests: Latency and throughput benchmarks
Unit Tests
Agent Testing
Testing Individual Agents:
import pytest
from unittest.mock import Mock, AsyncMock
from agents.quality_guardian import QualityGuardian
@pytest.mark.asyncio
async def test_quality_guardian_scoring():
# Setup
agent = QualityGuardian()
mock_vendor = Mock(
id='vendor-123',
posts=[
Mock(imageUrl='url1', qualityScore=0.9),
Mock(imageUrl='url2', qualityScore=0.85)
],
followerCount=5000,
engagementRate=0.06
)
# Execute
score = await agent.evaluateVendor(mock_vendor)
# Assert
assert 0.0 <= score.overall <= 1.0
assert score.visual.overall > 0.8
assert score.authenticity.score > 0.7
@pytest.mark.asyncio
async def test_quality_guardian_batch():
agent = QualityGuardian()
vendors = [
Mock(id=f'vendor-{i}', qualityScore=0.8)
for i in range(10)
]
scores = await agent.batchEvaluate(vendors)
assert len(scores) == 10
assert all(0.0 <= s <= 1.0 for s in scores)
Testing Agent Tools:
@pytest.mark.asyncio
async def test_discovery_agent_search_tool():
agent = DiscoveryAgent()
results = await agent.searchVendors(
query='garden venue melbourne',
region='Melbourne',
limit=20
)
assert len(results) <= 20
assert all(v.region == 'Melbourne' for v in results)
assert all(v.qualityScore > 0.0 for v in results)
Orchestrator Testing
import { describe, it, expect, vi } from 'vitest';
import { OrchestratorDO } from './orchestrator';
describe('Orchestrator', () => {
it('should rank content using Thompson Sampling', async () => {
const orchestrator = new OrchestratorDO(mockState, mockEnv);
const candidates = [
{ id: 'content-1', qualityScore: 0.9 },
{ id: 'content-2', qualityScore: 0.8 },
{ id: 'content-3', qualityScore: 0.7 }
];
const ranked = await orchestrator.rankFeed(
mockContext,
candidates
);
expect(ranked).toHaveLength(3);
expect(ranked[0].score).toBeGreaterThan(ranked[1].score);
});
it('should update Thompson params on interaction', async () => {
const orchestrator = new OrchestratorDO(mockState, mockEnv);
await orchestrator.recordInteraction({
contentId: 'content-1',
action: 'save',
success: true
});
const params = orchestrator.getThompsonParams('content-1');
expect(params.alpha).toBeGreaterThan(1);
});
});
Foundation Model Testing
import pytest
import numpy as np
from models.foundation_model import LightweightFoundationModel
def test_content_embedding():
model = LightweightFoundationModel()
text = "Beautiful garden wedding venue in Melbourne"
embedding = model.get_content_embedding(text)
assert embedding.shape == (384,) # Sentence-BERT dimension
assert -1 <= embedding.min() <= 1
assert -1 <= embedding.max() <= 1
def test_user_embedding():
model = LightweightFoundationModel()
interactions = [
"Modern minimalist wedding photography",
"Elegant garden venue with natural light",
"Contemporary floral arrangements"
]
user_embedding = model.get_user_embedding(interactions)
assert user_embedding.shape == (384,)
assert np.linalg.norm(user_embedding) > 0 # Not zero vector
def test_cold_start():
model = LightweightFoundationModel()
# New user with no interactions
user_embedding = model.get_cold_start_embedding(
preferences=['modern', 'minimalist', 'garden']
)
assert user_embedding.shape == (384,)
assert user_embedding is not None
Integration Tests
Multi-Agent Collaboration
@pytest.mark.asyncio
async def test_agent_crew_collaboration():
# Setup crew
orchestrator = Orchestrator()
discovery = DiscoveryAgent()
quality = QualityGuardian()
archivist = PersonalArchivist()
crew = Crew(
agents=[orchestrator, discovery, quality, archivist],
process=Process.hierarchical
)
# Execute
result = await crew.kickoff({
'user_id': 'test-user-123',
'limit': 20
})
# Assert
assert len(result.feed) == 20
assert all(c.qualityScore > 0.7 for c in result.feed)
assert result.metadata.agentScores is not None
@pytest.mark.asyncio
async def test_magrpo_policy_update():
agents = [
DiscoveryAgent(),
QualityGuardian(),
PersonalArchivist()
]
# Initial proposals
initial_proposals = await asyncio.gather(*[
agent.propose(mock_context) for agent in agents
])
# Simulate positive outcome
advantage = 0.8
# Update policies
await asyncio.gather(*[
agent.updatePolicy(advantage) for agent in agents
])
# New proposals should be different (policy updated)
new_proposals = await asyncio.gather(*[
agent.propose(mock_context) for agent in agents
])
assert new_proposals != initial_proposals
API Integration
import { describe, it, expect } from 'vitest';
import { app } from './server';
describe('Feed API', () => {
it('should generate personalized feed', async () => {
const response = await app.request(
'/api/feed/user-123',
{ method: 'GET' }
);
expect(response.status).toBe(200);
const data = await response.json();
expect(data.feed).toHaveLength(20);
expect(data.metadata.latency_ms).toBeLessThan(500);
});
it('should handle interaction logging', async () => {
const response = await app.request('/api/feed/interaction', {
method: 'POST',
body: JSON.stringify({
userId: 'user-123',
contentId: 'content-456',
action: 'save',
duration: 5.2
})
});
expect(response.status).toBe(200);
const data = await response.json();
expect(data.success).toBe(true);
expect(data.thompsonUpdated).toBe(true);
});
});
Database Integration
@pytest.mark.asyncio
async def test_qdrant_integration():
from services.vector_db import QdrantService
qdrant = QdrantService(url=TEST_QDRANT_URL, api_key=TEST_API_KEY)
# Upload embeddings
await qdrant.upsert(
collection='test_collection',
points=[
{'id': 'content-1', 'vector': [0.1] * 384, 'payload': {'type': 'venue'}},
{'id': 'content-2', 'vector': [0.2] * 384, 'payload': {'type': 'photo'}}
]
)
# Search
results = await qdrant.search(
collection='test_collection',
vector=[0.15] * 384,
limit=2
)
assert len(results) == 2
assert results[0].id == 'content-1' # Closer match
@pytest.mark.asyncio
async def test_supabase_integration():
from services.database import SupabaseService
db = SupabaseService(url=TEST_SUPABASE_URL, key=TEST_SUPABASE_KEY)
# Insert interaction
await db.insert('interactions', {
'user_id': 'user-123',
'content_id': 'content-456',
'action': 'save',
'timestamp': 'now()'
})
# Query interactions
interactions = await db.select(
'interactions',
filters={'user_id': 'user-123'}
)
assert len(interactions) > 0
assert interactions[0]['user_id'] == 'user-123'
E2E Tests
User Flow Testing
import { test, expect } from '@playwright/test';
test('user can browse and save content', async ({ page }) => {
// Navigate to app
await page.goto('http://localhost:3000');
// Login
await page.fill('input[name="email"]', 'test@example.com');
await page.fill('input[name="password"]', 'password123');
await page.click('button[type="submit"]');
// Wait for feed to load
await page.waitForSelector('.feed-item');
// Should show personalized content
const feedItems = await page.locator('.feed-item').count();
expect(feedItems).toBeGreaterThan(0);
// Save first item
await page.locator('.feed-item').first().click();
await page.click('button[aria-label="Save"]');
// Verify saved
await page.goto('http://localhost:3000/saved');
const savedItems = await page.locator('.saved-item').count();
expect(savedItems).toBe(1);
});
test('Thompson Sampling learns from interactions', async ({ page }) => {
await page.goto('http://localhost:3000');
// Record initial feed
const initialFeed = await page.locator('.feed-item').allTextContents();
// Interact with modern style content only
for (let i = 0; i < 10; i++) {
const modernItems = page.locator('.feed-item[data-style="modern"]');
await modernItems.first().click();
await page.click('button[aria-label="Save"]');
await page.goBack();
}
// Refresh feed
await page.reload();
// New feed should have more modern content
const modernCount = await page.locator('.feed-item[data-style="modern"]').count();
expect(modernCount).toBeGreaterThan(5);
});
Performance Tests
Load Testing
import pytest
import asyncio
from locust import HttpUser, task, between
class FeedUser(HttpUser):
wait_time = between(1, 3)
@task
def get_feed(self):
self.client.get(f'/api/feed/user-{self.user_id}')
@task(3)
def record_interaction(self):
self.client.post('/api/feed/interaction', json={
'userId': f'user-{self.user_id}',
'contentId': f'content-{random.randint(1, 1000)}',
'action': random.choice(['view', 'save', 'share'])
})
# Run with: locust -f performance_test.py --users 1000 --spawn-rate 10
Latency Benchmarks
import { describe, it, expect } from 'vitest';
import { performance } from 'perf_hooks';
describe('Performance Benchmarks', () => {
it('feed generation < 500ms (p99)', async () => {
const latencies = [];
for (let i = 0; i < 100; i++) {
const start = performance.now();
await generateFeed('user-123');
const end = performance.now();
latencies.push(end - start);
}
latencies.sort((a, b) => a - b);
const p99 = latencies[Math.floor(latencies.length * 0.99)];
expect(p99).toBeLessThan(500);
});
it('agent scoring < 200ms (p99)', async () => {
const latencies = [];
const agent = new QualityGuardian();
for (let i = 0; i < 100; i++) {
const start = performance.now();
await agent.score(mockCandidates, mockContext);
const end = performance.now();
latencies.push(end - start);
}
const p99 = latencies[Math.floor(latencies.length * 0.99)];
expect(p99).toBeLessThan(200);
});
});
Test Data
Fixtures
# tests/fixtures.py
import pytest
@pytest.fixture
def mock_user():
return {
'id': 'user-123',
'location': { 'region': 'Melbourne' },
'weddingDate': '2026-06-15',
'style': 'modern_minimalist',
'embedding': [0.1] * 384
}
@pytest.fixture
def mock_content():
return [
{
'id': 'content-1',
'vendorId': 'vendor-1',
'vendorType': 'photographer',
'region': 'Melbourne',
'qualityScore': 0.9,
'embedding': [0.2] * 384
},
# ... more content
]
@pytest.fixture
async def test_db():
# Setup test database
db = await create_test_database()
yield db
# Teardown
await db.cleanup()
Mock Data Generators
// tests/generators.ts
export function generateMockVendor(overrides = {}) {
return {
id: `vendor-${Math.random()}`,
username: 'test_vendor',
region: 'Melbourne',
qualityScore: 0.8,
followerCount: 5000,
...overrides
};
}
export function generateMockInteraction(overrides = {}) {
return {
userId: 'user-123',
contentId: 'content-456',
action: 'view',
duration: 3.5,
timestamp: Date.now(),
...overrides
};
}
CI/CD Integration
GitHub Actions
# .github/workflows/test.yml
name: Tests
on: [push, pull_request]
jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- run: npm ci
- run: npm run test:unit
- run: npm run test:coverage
integration-tests:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:14
qdrant:
image: qdrant/qdrant
steps:
- uses: actions/checkout@v3
- run: npm ci
- run: npm run test:integration
e2e-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: npm ci
- run: npm run build
- run: npm run test:e2e
Test Commands
{
"scripts": {
"test": "vitest",
"test:unit": "vitest run --coverage",
"test:integration": "vitest run --config vitest.integration.config.ts",
"test:e2e": "playwright test",
"test:performance": "locust -f tests/performance.py",
"test:watch": "vitest watch"
}
}
Best Practices
- Test Pyramid - More unit tests, fewer E2E tests
- Isolation - Tests should be independent
- Fast Feedback - Unit tests run in < 1 second
- Realistic Data - Use production-like test data
- Clean State - Reset state between tests
- Meaningful Assertions - Test behavior, not implementation