Container Deployment with Kubernetes
1. Dockerfile for AI Agent
# Multi-stage build for AI Agent
FROM python:3.11-slim as builder
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY requirements.txt .
RUN pip install --user --no-cache-dir -r requirements.txt
# Production stage
FROM python:3.11-slim
WORKDIR /app
# Copy Python packages from builder
COPY --from=builder /root/.local /root/.local
ENV PATH=/root/.local/bin:$PATH
# Copy application code
COPY . .
# Security: Run as non-root user
RUN useradd -m -u 1000 agent && chown -R agent:agent /app
USER agent
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8000/health')"
# Environment variables
ENV PYTHONUNBUFFERED=1
ENV MODEL_CACHE_DIR=/app/models
ENV LOG_LEVEL=INFO
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]
2. Kubernetes Deployment
# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: ai-agent
namespace: production
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 1
selector:
matchLabels:
app: ai-agent
template:
metadata:
labels:
app: ai-agent
spec:
containers:
- name: agent
image: your-registry/ai-agent:v1.0.0
ports:
- containerPort: 8000
env:
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: ai-secrets
key: openai-key
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: db-secrets
key: connection-string
- name: REDIS_URL
value: "redis://redis-service:6379"
resources:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "4Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
# Sidecar for monitoring
- name: prometheus-exporter
image: prom/node-exporter:latest
ports:
- containerPort: 9100
---
apiVersion: v1
kind: Service
metadata:
name: ai-agent-service
namespace: production
spec:
selector:
app: ai-agent
ports:
- protocol: TCP
port: 80
targetPort: 8000
type: LoadBalancer
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: ai-agent-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: ai-agent
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
3. Helm Chart Structure
# Chart.yaml
apiVersion: v2
name: ai-agent
description: AI Agent Deployment
type: application
version: 1.0.0
appVersion: "1.0"
# values.yaml
replicaCount: 3
image:
repository: your-registry/ai-agent
pullPolicy: IfNotPresent
tag: "v1.0.0"
service:
type: LoadBalancer
port: 80
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/rate-limit: "100"
hosts:
- host: api.yourdomain.com
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: ai-agent-tls
hosts:
- api.yourdomain.com
resources:
limits:
cpu: 2000m
memory: 4Gi
requests:
cpu: 1000m
memory: 2Gi
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 10
targetCPUUtilizationPercentage: 70
redis:
enabled: true
architecture: standalone
auth:
enabled: true
password: "your-redis-password"
postgresql:
enabled: true
auth:
postgresPassword: "your-postgres-password"
database: "agent_db"
Monitoring & Observability
Monitoring Stack
# Prometheus metrics collection
from prometheus_client import Counter, Histogram, Gauge
import time
# Metrics
request_count = Counter('agent_requests_total', 'Total requests')
request_duration = Histogram('agent_request_duration_seconds', 'Request duration')
active_sessions = Gauge('agent_active_sessions', 'Active sessions')
token_usage = Counter('agent_tokens_used', 'Total tokens used')
error_count = Counter('agent_errors_total', 'Total errors', ['error_type'])
# Instrumentation
class MonitoredAgent:
@request_duration.time()
def process_request(self, request):
request_count.inc()
active_sessions.inc()
try:
# Process request
response = self.agent.process(request)
token_usage.inc(response.token_count)
return response
except Exception as e:
error_count.labels(error_type=type(e).__name__).inc()
raise
finally:
active_sessions.dec()
# Grafana dashboard config
dashboard_config = {
"panels": [
{
"title": "Request Rate",
"targets": [{"expr": "rate(agent_requests_total[5m])"}]
},
{
"title": "Latency (P95)",
"targets": [{"expr": "histogram_quantile(0.95, agent_request_duration_seconds)"}]
},
{
"title": "Token Usage",
"targets": [{"expr": "rate(agent_tokens_used[1h])"}]
},
{
"title": "Error Rate",
"targets": [{"expr": "rate(agent_errors_total[5m])"}]
}
]
}
# Logging configuration
import structlog
logger = structlog.get_logger()
logger.info("agent_request",
request_id=request.id,
user_id=request.user_id,
action=request.action,
latency=response_time,
tokens=token_count,
cache_hit=cache_hit
)
# Distributed tracing with OpenTelemetry
from opentelemetry import trace
from opentelemetry.exporter.jaeger import JaegerExporter
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("process_request") as span:
span.set_attribute("request.id", request.id)
span.set_attribute("model.name", model_name)
# Process request
Security & Compliance
Security Implementation
# Security middleware
from fastapi import FastAPI, Request, HTTPException
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import jwt
import redis
from typing import Optional
app = FastAPI()
security = HTTPBearer()
rate_limiter = redis.Redis()
# Rate limiting
async def rate_limit_check(request: Request):
client_ip = request.client.host
key = f"rate_limit:{client_ip}"
try:
requests = rate_limiter.incr(key)
if requests == 1:
rate_limiter.expire(key, 60) # 1 minute window
if requests > 100: # 100 requests per minute
raise HTTPException(status_code=429, detail="Rate limit exceeded")
except redis.RedisError:
pass # Don't block on rate limit errors
# Authentication
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
token = credentials.credentials
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
return payload
except jwt.ExpiredSignatureError:
raise HTTPException(status_code=401, detail="Token expired")
except jwt.InvalidTokenError:
raise HTTPException(status_code=401, detail="Invalid token")
# Input sanitization
def sanitize_input(text: str) -> str:
# Remove potential injection attacks
dangerous_patterns = [
r'.*?',
r'javascript:',
r'on\w+\s*=',
r'DROP TABLE',
r'DELETE FROM'
]
for pattern in dangerous_patterns:
text = re.sub(pattern, '', text, flags=re.IGNORECASE)
return text[:MAX_INPUT_LENGTH] # Limit input length
# Audit logging
async def audit_log(request: Request, response: Response):
log_entry = {
"timestamp": datetime.utcnow().isoformat(),
"ip": request.client.host,
"method": request.method,
"path": request.url.path,
"status": response.status_code,
"user_id": getattr(request.state, "user_id", None)
}
await audit_logger.log(log_entry)
Deployment Strategies
Blue-Green Deployment
# Blue-Green deployment script
#!/bin/bash
# Deploy to green environment
kubectl apply -f deployment-green.yaml
# Wait for green to be ready
kubectl wait --for=condition=ready pod -l version=green --timeout=300s
# Run smoke tests
./run-smoke-tests.sh green
if [ $? -eq 0 ]; then
# Switch traffic to green
kubectl patch service ai-agent-service -p '{"spec":{"selector":{"version":"green"}}}'
# Wait and monitor
sleep 60
# If stable, remove blue
kubectl delete deployment ai-agent-blue
# Rename green to blue for next deployment
kubectl patch deployment ai-agent-green -p '{"metadata":{"name":"ai-agent-blue"}}'
else
echo "Smoke tests failed, rolling back"
kubectl delete deployment ai-agent-green
fi
Canary Deployment
# Flagger canary configuration
apiVersion: flagger.app/v1beta1
kind: Canary
metadata:
name: ai-agent
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: ai-agent
progressDeadlineSeconds: 600
service:
port: 8000
analysis:
interval: 1m
threshold: 5
maxWeight: 50
stepWeight: 10
metrics:
- name: request-success-rate
thresholdRange:
min: 99
interval: 1m
- name: request-duration
thresholdRange:
max: 500
interval: 1m
webhooks:
- name: smoke-test
url: http://flagger-loadtester/
timeout: 30s
metadata:
type: smoke
cmd: "curl -s http://ai-agent-canary:8000/health"