From c5510e72debc92e312ac9749808158f5f050986f Mon Sep 17 00:00:00 2001 From: Kai Chappell Date: Sun, 8 Jun 2025 10:16:19 +0000 Subject: [PATCH] docs: add deployment guide --- .env.example | 147 +++++++++++++++++ docs/deployment.md | 403 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 550 insertions(+) create mode 100644 .env.example create mode 100644 docs/deployment.md diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f04d530 --- /dev/null +++ b/.env.example @@ -0,0 +1,147 @@ +# Arbiter Environment Configuration +# Copy this file to .env and fill in your values +# All variables are prefixed with ARBITER_ unless noted + +# ============================================================================= +# Database +# ============================================================================= + +# PostgreSQL connection URL (required) +ARBITER_DATABASE_URL=postgresql+asyncpg://arbiter:arbiter@localhost:5432/arbiter + +# Connection pool settings +ARBITER_DATABASE_POOL_SIZE=5 +ARBITER_DATABASE_MAX_OVERFLOW=10 + +# ============================================================================= +# Redis +# ============================================================================= + +# Redis connection URL (required) +ARBITER_REDIS_URL=redis://localhost:6379/0 + +# Connection pool size +ARBITER_REDIS_MAX_CONNECTIONS=10 + +# ============================================================================= +# LLM Configuration +# ============================================================================= + +# API keys (at least one required) - NOT prefixed with ARBITER_ +OPENAI_API_KEY=sk-... +# ANTHROPIC_API_KEY=sk-ant-... + +# Default model for agents +ARBITER_DEFAULT_MODEL=gpt-4o + +# Request settings +ARBITER_LLM_TIMEOUT=60 +ARBITER_LLM_MAX_RETRIES=3 + +# ============================================================================= +# Cost Controls +# ============================================================================= + +# Maximum tokens per review +ARBITER_MAX_TOKENS_PER_REVIEW=50000 + +# Maximum cost per review in USD +ARBITER_MAX_COST_PER_REVIEW_USD=0.50 + +# Response cache TTL in hours +ARBITER_CACHE_TTL_HOURS=24 + +# ============================================================================= +# GitHub Integration +# ============================================================================= + +# GitHub API token for fetching diffs and posting comments +ARBITER_GITHUB_TOKEN=ghp_... + +# Webhook secret for HMAC verification +ARBITER_GITHUB_WEBHOOK_SECRET=your-webhook-secret + +# GitHub API base URL (for GitHub Enterprise) +ARBITER_GITHUB_BASE_URL=https://api.github.com + +# ============================================================================= +# GitLab Integration +# ============================================================================= + +# GitLab API token for fetching diffs and posting comments +ARBITER_GITLAB_TOKEN=glpat-... + +# Webhook token for verification +ARBITER_GITLAB_WEBHOOK_TOKEN=your-webhook-token + +# GitLab instance base URL +ARBITER_GITLAB_BASE_URL=https://gitlab.com + +# ============================================================================= +# API Settings +# ============================================================================= + +# OpenAPI metadata +ARBITER_API_TITLE=Arbiter API +ARBITER_API_VERSION=0.5.0 + +# CORS allowed origins (JSON array) +ARBITER_CORS_ORIGINS=["http://localhost:3000"] + +# Rate limiting +ARBITER_API_RATE_LIMIT_PER_MINUTE=60 + +# ============================================================================= +# Worker Settings +# ============================================================================= + +# Maximum concurrent jobs per worker +ARBITER_WORKER_MAX_JOBS=10 + +# Job timeout in seconds +ARBITER_WORKER_JOB_TIMEOUT=300 + +# Retry attempts on failure +ARBITER_WORKER_RETRY_ATTEMPTS=3 + +# ============================================================================= +# Integration Settings +# ============================================================================= + +# API request timeout for platform integrations +ARBITER_INTEGRATION_TIMEOUT=30 + +# Retry attempts for platform API calls +ARBITER_INTEGRATION_MAX_RETRIES=3 + +# Context name for commit status checks +ARBITER_STATUS_CHECK_CONTEXT=arbiter + +# ============================================================================= +# Feature Flags +# ============================================================================= + +# Post review comments on PRs/MRs +ARBITER_POST_COMMENTS=true + +# Update commit status checks +ARBITER_UPDATE_STATUS=true + +# Enable follow-up question handling in PR comments +ARBITER_FOLLOWUP_ENABLED=true + +# Minimum confidence to process a follow-up question (0.0 - 1.0) +ARBITER_FOLLOWUP_CONFIDENCE_THRESHOLD=0.5 + +# Maximum tokens per follow-up response +ARBITER_FOLLOWUP_MAX_TOKENS_PER_RESPONSE=2000 + +# ============================================================================= +# Paths +# ============================================================================= + +# Directory containing prompt templates +ARBITER_TEMPLATES_DIR=templates + +# Output format (rich, json, markdown) +ARBITER_OUTPUT_FORMAT=rich diff --git a/docs/deployment.md b/docs/deployment.md new file mode 100644 index 0000000..238ec48 --- /dev/null +++ b/docs/deployment.md @@ -0,0 +1,403 @@ +# Deployment Guide + +This guide covers deploying Arbiter in various environments. + +## Local Development + +### Prerequisites + +- Python 3.12+ +- Node.js 20+ +- Docker and Docker Compose +- PostgreSQL 16 (or use Docker) +- Redis 7 (or use Docker) + +### Quick Start with Docker + +Start all services with a single command: + +```bash +# Start everything +docker compose up -d + +# Run database migrations +docker compose run --rm migrate + +# View logs +docker compose logs -f +``` + +Services: +- API: http://localhost:8000 +- Dashboard: http://localhost:3000 +- PostgreSQL: localhost:5432 +- Redis: localhost:6379 + +### Development without Docker + +For faster iteration during development, run infrastructure in Docker and +services locally: + +```bash +# Start only database and Redis +docker compose up -d db redis + +# Install Python dependencies +pip install -e ".[dev]" + +# Run migrations +alembic upgrade head + +# Terminal 1: Start API server +uvicorn src.arbiter.main:app --reload + +# Terminal 2: Start worker +arq src.arbiter.worker.tasks.WorkerSettings + +# Terminal 3: Start dashboard +cd dashboard && npm install && npm run dev +``` + +## Production Deployment + +### Environment Configuration + +Create a `.env` file with production values. See [.env.example](../.env.example) +for all available options. + +Required environment variables: + +```bash +# Database (use strong credentials) +ARBITER_DATABASE_URL=postgresql+asyncpg://user:password@db-host:5432/arbiter + +# Redis +ARBITER_REDIS_URL=redis://redis-host:6379/0 + +# LLM API key (at least one required) +OPENAI_API_KEY=sk-... +# or +ANTHROPIC_API_KEY=sk-ant-... + +# Platform integration (at least one required) +ARBITER_GITHUB_TOKEN=ghp_... +ARBITER_GITHUB_WEBHOOK_SECRET=your-webhook-secret +# or +ARBITER_GITLAB_TOKEN=glpat-... +ARBITER_GITLAB_WEBHOOK_TOKEN=your-webhook-token + +# Security +ARBITER_CORS_ORIGINS=["https://your-dashboard-domain.com"] +``` + +### Production Checklist + +Before deploying to production: + +- [ ] Set strong database credentials +- [ ] Configure webhook secrets +- [ ] Set up SSL/TLS termination +- [ ] Configure CORS origins to match your dashboard domain +- [ ] Set appropriate rate limits +- [ ] Configure monitoring and alerting +- [ ] Set up log aggregation +- [ ] Plan for database backups +- [ ] Review cost control settings + +### Database Setup + +Arbiter uses PostgreSQL with async support via asyncpg. + +1. Create database: + +```sql +CREATE USER arbiter WITH PASSWORD 'strong-password-here'; +CREATE DATABASE arbiter OWNER arbiter; +``` + +2. Run migrations: + +```bash +alembic upgrade head +``` + +3. (Optional) Enable connection pooling with PgBouncer for high-traffic deployments. + +### Redis Configuration + +Redis is used for: +- Job queue (arq worker) +- LLM response caching +- Session management + +Recommended Redis configuration for production: + +```conf +# redis.conf +maxmemory 256mb +maxmemory-policy allkeys-lru +appendonly yes +``` + +For high availability, consider Redis Sentinel or a managed Redis service. + +### Webhook Setup + +#### GitHub + +1. Go to repository Settings > Webhooks > Add webhook +2. Configure: + - **Payload URL:** `https://your-domain.com/webhooks/github` + - **Content type:** `application/json` + - **Secret:** Same value as `ARBITER_GITHUB_WEBHOOK_SECRET` + - **Events:** Select "Pull requests" and "Issue comments" +3. Save and test delivery + +#### GitLab + +1. Go to project Settings > Webhooks +2. Configure: + - **URL:** `https://your-domain.com/webhooks/gitlab` + - **Secret token:** Same value as `ARBITER_GITLAB_WEBHOOK_TOKEN` + - **Triggers:** Select "Merge request events" and "Comments" +3. Save and test + +### SSL/TLS + +Arbiter should always be deployed behind HTTPS in production. + +#### With nginx + +```nginx +server { + listen 443 ssl http2; + server_name arbiter.example.com; + + ssl_certificate /etc/ssl/certs/arbiter.crt; + ssl_certificate_key /etc/ssl/private/arbiter.key; + + # API + location /api/ { + proxy_pass http://localhost:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Webhooks + location /webhooks/ { + proxy_pass http://localhost:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Dashboard + location / { + proxy_pass http://localhost:3000; + proxy_set_header Host $host; + } +} +``` + +#### With Caddy + +```caddyfile +arbiter.example.com { + handle /api/* { + reverse_proxy localhost:8000 + } + handle /webhooks/* { + reverse_proxy localhost:8000 + } + handle { + reverse_proxy localhost:3000 + } +} +``` + +### Container Orchestration + +#### Docker Compose (Single Server) + +For small deployments, use the provided Docker Compose configuration: + +```bash +docker compose -f docker-compose.yml up -d +docker compose run --rm migrate +``` + +#### Kubernetes + +Example deployment manifest: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: arbiter-api +spec: + replicas: 2 + selector: + matchLabels: + app: arbiter-api + template: + metadata: + labels: + app: arbiter-api + spec: + containers: + - name: api + image: arbiter:latest + ports: + - containerPort: 8000 + envFrom: + - secretRef: + name: arbiter-secrets + livenessProbe: + httpGet: + path: /health/live + port: 8000 + readinessProbe: + httpGet: + path: /health/ready + port: 8000 +``` + +### Scaling Considerations + +#### Horizontal Scaling + +- **API servers:** Stateless, scale horizontally behind a load balancer +- **Workers:** Scale based on job queue depth; each worker processes up to + `ARBITER_WORKER_MAX_JOBS` concurrent jobs +- **Dashboard:** Static files, can be served from CDN + +#### Vertical Scaling + +- **Database:** Increase connection pool size for more concurrent requests +- **Redis:** Increase memory for larger cache hit rates + +#### Recommended Starting Configuration + +| Component | Replicas | Resources | +|-----------|----------|-----------| +| API | 2 | 512MB RAM, 0.5 CPU | +| Worker | 2 | 1GB RAM, 1 CPU | +| Dashboard | 1 | 128MB RAM | +| PostgreSQL | 1 | 1GB RAM | +| Redis | 1 | 256MB RAM | + +### Monitoring + +#### Prometheus Metrics + +The API exposes Prometheus metrics at `/metrics`: + +- `arbiter_reviews_total` — Total reviews processed +- `arbiter_review_duration_seconds` — Review processing time +- `arbiter_llm_requests_total` — LLM API calls +- `arbiter_llm_cache_hits_total` — Cache hit rate +- `arbiter_cost_usd_total` — Total LLM costs + +#### Health Endpoints + +- `GET /health` — Basic liveness check +- `GET /health/live` — Kubernetes liveness probe +- `GET /health/ready` — Readiness check (database + Redis) + +#### Alerting Recommendations + +Set up alerts for: +- High error rate on webhook endpoints +- Worker queue depth exceeding threshold +- LLM cost exceeding daily budget +- Database connection pool exhaustion +- Redis memory usage + +## Environment Variables Reference + +All environment variables are prefixed with `ARBITER_` unless noted. + +### Database + +| Variable | Description | Default | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection URL | `postgresql+asyncpg://arbiter:arbiter@localhost:5432/arbiter` | +| `DATABASE_POOL_SIZE` | Connection pool size | `5` | +| `DATABASE_MAX_OVERFLOW` | Max overflow connections | `10` | + +### Redis + +| Variable | Description | Default | +|----------|-------------|---------| +| `REDIS_URL` | Redis connection URL | `redis://localhost:6379/0` | +| `REDIS_MAX_CONNECTIONS` | Max connection pool size | `10` | + +### LLM Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `DEFAULT_MODEL` | Default LLM model | `gpt-4o` | +| `LLM_TIMEOUT` | Request timeout (seconds) | `60` | +| `LLM_MAX_RETRIES` | Retry attempts | `3` | +| `MAX_TOKENS_PER_REVIEW` | Token budget per review | `50000` | +| `MAX_COST_PER_REVIEW_USD` | Cost limit per review | `0.50` | +| `CACHE_TTL_HOURS` | Response cache TTL | `24` | + +Note: LLM API keys are read directly (not prefixed): +- `OPENAI_API_KEY` +- `ANTHROPIC_API_KEY` + +### GitHub Integration + +| Variable | Description | Default | +|----------|-------------|---------| +| `GITHUB_TOKEN` | API token for fetching diffs and posting comments | - | +| `GITHUB_WEBHOOK_SECRET` | Webhook HMAC secret | - | +| `GITHUB_BASE_URL` | API base URL (for GitHub Enterprise) | `https://api.github.com` | + +### GitLab Integration + +| Variable | Description | Default | +|----------|-------------|---------| +| `GITLAB_TOKEN` | API token for fetching diffs and posting comments | - | +| `GITLAB_WEBHOOK_TOKEN` | Webhook verification token | - | +| `GITLAB_BASE_URL` | Instance URL | `https://gitlab.com` | + +### API Settings + +| Variable | Description | Default | +|----------|-------------|---------| +| `API_TITLE` | OpenAPI title | `Arbiter API` | +| `API_VERSION` | API version string | `0.5.0` | +| `CORS_ORIGINS` | Allowed CORS origins (JSON array) | `["http://localhost:3000"]` | +| `API_RATE_LIMIT_PER_MINUTE` | Rate limit per client | `60` | + +### Worker Settings + +| Variable | Description | Default | +|----------|-------------|---------| +| `WORKER_MAX_JOBS` | Max concurrent jobs | `10` | +| `WORKER_JOB_TIMEOUT` | Job timeout (seconds) | `300` | +| `WORKER_RETRY_ATTEMPTS` | Retry count on failure | `3` | + +### Feature Flags + +| Variable | Description | Default | +|----------|-------------|---------| +| `POST_COMMENTS` | Post review comments to PRs | `true` | +| `UPDATE_STATUS` | Update commit status checks | `true` | +| `FOLLOWUP_ENABLED` | Enable follow-up question handling | `true` | +| `FOLLOWUP_CONFIDENCE_THRESHOLD` | Minimum confidence to respond | `0.5` | +| `FOLLOWUP_MAX_TOKENS_PER_RESPONSE` | Token limit for follow-ups | `2000` | + +### Integration Settings + +| Variable | Description | Default | +|----------|-------------|---------| +| `INTEGRATION_TIMEOUT` | API request timeout (seconds) | `30` | +| `INTEGRATION_MAX_RETRIES` | Retry attempts | `3` | +| `STATUS_CHECK_CONTEXT` | Commit status context name | `arbiter` |