๐ Custom Resource Definitions (CRDs)
CRDs extend Kubernetes API with custom resources, allowing you to create your own resource types that behave like native Kubernetes objects.
API Extension
Add new resource types to Kubernetes API without modifying Kubernetes itself
Declarative Config
Define desired state using YAML manifests just like built-in resources
CRUD Operations
Support standard kubectl operations: create, get, update, delete
Creating a Custom Resource Definition
apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: databases.example.com spec: group: example.com versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object properties: engine: type: string enum: ["postgres", "mysql", "mongodb"] version: type: string replicas: type: integer minimum: 1 maximum: 10 storage: type: object properties: size: type: string pattern: "^[0-9]+Gi$" class: type: string required: ["size"] backup: type: object properties: enabled: type: boolean schedule: type: string required: ["engine", "version", "storage"] status: type: object properties: phase: type: string enum: ["Creating", "Running", "Failed", "Deleting"] replicas: type: integer endpoint: type: string additionalPrinterColumns: - name: Engine type: string jsonPath: .spec.engine - name: Version type: string jsonPath: .spec.version - name: Status type: string jsonPath: .status.phase - name: Age type: date jsonPath: .metadata.creationTimestamp scope: Namespaced names: plural: databases singular: database kind: Database shortNames: - db
Using Custom Resources
apiVersion: example.com/v1 kind: Database metadata: name: my-postgres namespace: production spec: engine: postgres version: "14.5" replicas: 3 storage: size: 100Gi class: fast-ssd backup: enabled: true schedule: "0 2 * * *"
๐ Advanced CRD Features
Versioning
spec: versions: - name: v1beta1 served: true storage: false deprecated: true deprecationWarning: "v1beta1 is deprecated, use v1" schema: # ... v1beta1 schema - name: v1 served: true storage: true schema: # ... v1 schema conversion: strategy: Webhook webhook: clientConfig: service: name: crd-conversion-webhook namespace: system path: "/convert" conversionReviewVersions: ["v1", "v1beta1"]
Validation with CEL
schema: openAPIV3Schema: type: object properties: spec: type: object x-kubernetes-validations: - rule: "self.minReplicas <= self.replicas" message: "replicas must be greater than or equal to minReplicas" - rule: "self.replicas <= self.maxReplicas" message: "replicas must be less than or equal to maxReplicas" properties: replicas: type: integer minReplicas: type: integer maxReplicas: type: integer
Subresources
spec: versions: - name: v1 served: true storage: true subresources: status: {} # Enable status subresource scale: # Enable scale subresource specReplicasPath: .spec.replicas statusReplicasPath: .status.replicas labelSelectorPath: .status.labelSelector
๐ฎ Kubernetes Controllers
Controllers are control loops that watch the state of your cluster and make changes to move the current state toward the desired state.
Monitor resources
Check desired vs actual
Make changes
Status & repeat
Controller Implementation (Go)
package controllers import ( "context" "fmt" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" examplev1 "example.com/api/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type DatabaseReconciler struct { client.Client Scheme *runtime.Scheme } // Reconcile is the main logic of the controller func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := log.FromContext(ctx) // Fetch the Database instance database := &examplev1.Database{} err := r.Get(ctx, req.NamespacedName, database) if err != nil { if errors.IsNotFound(err) { // Object not found, could have been deleted return ctrl.Result{}, nil } return ctrl.Result{}, err } // Define desired StatefulSet statefulSet := r.statefulSetForDatabase(database) // Check if StatefulSet exists found := &appsv1.StatefulSet{} err = r.Get(ctx, types.NamespacedName{ Name: statefulSet.Name, Namespace: statefulSet.Namespace, }, found) if err != nil && errors.IsNotFound(err) { // Create StatefulSet log.Info("Creating StatefulSet", "name", statefulSet.Name) err = r.Create(ctx, statefulSet) if err != nil { return ctrl.Result{}, err } // Update status database.Status.Phase = "Creating" r.Status().Update(ctx, database) return ctrl.Result{Requeue: true}, nil } else if err != nil { return ctrl.Result{}, err } // Update StatefulSet if needed if !reflect.DeepEqual(statefulSet.Spec, found.Spec) { found.Spec = statefulSet.Spec err = r.Update(ctx, found) if err != nil { return ctrl.Result{}, err } } // Update Database status database.Status.Phase = "Running" database.Status.Replicas = found.Status.Replicas err = r.Status().Update(ctx, database) return ctrl.Result{RequeueAfter: time.Minute}, nil } // statefulSetForDatabase returns a StatefulSet for the Database func (r *DatabaseReconciler) statefulSetForDatabase(db *examplev1.Database) *appsv1.StatefulSet { labels := map[string]string{ "app": "database", "database": db.Name, } replicas := int32(db.Spec.Replicas) return &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: db.Name + "-statefulset", Namespace: db.Namespace, OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(db, examplev1.GroupVersion.WithKind("Database")), }, }, Spec: appsv1.StatefulSetSpec{ Replicas: &replicas, Selector: &metav1.LabelSelector{ MatchLabels: labels, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: labels, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ Name: db.Spec.Engine, Image: fmt.Sprintf("%s:%s", db.Spec.Engine, db.Spec.Version), Ports: []corev1.ContainerPort{{ ContainerPort: 5432, Name: "database", }}, VolumeMounts: []corev1.VolumeMount{{ Name: "data", MountPath: "/var/lib/data", }}, }}, }, }, VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{ ObjectMeta: metav1.ObjectMeta{ Name: "data", }, Spec: corev1.PersistentVolumeClaimSpec{ AccessModes: []corev1.PersistentVolumeAccessMode{ corev1.ReadWriteOnce, }, Resources: corev1.ResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceStorage: resource.MustParse(db.Spec.Storage.Size), }, }, StorageClassName: &db.Spec.Storage.Class, }, }}, }, } } // SetupWithManager sets up the controller with the Manager func (r *DatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&examplev1.Database{}). Owns(&appsv1.StatefulSet{}). Complete(r) }
- Reconciliation: Core loop that ensures desired state matches actual state
- Owner References: Automatic cleanup when parent resource is deleted
- Status Updates: Keep users informed about resource state
- Requeue: Schedule future reconciliation for ongoing operations
๐ Controller Patterns
Level-Based Triggering
React to current state, not events. Makes controllers resilient to restarts and missed events.
Idempotency
Multiple reconciliations produce the same result. Safe to retry operations.
Finalizers
Clean up external resources before deletion. Prevent orphaned resources.
Implementing Finalizers
const databaseFinalizer = "database.example.com/finalizer" func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { database := &examplev1.Database{} err := r.Get(ctx, req.NamespacedName, database) if err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } // Check if the resource is marked for deletion if database.ObjectMeta.DeletionTimestamp != nil { if controllerutil.ContainsFinalizer(database, databaseFinalizer) { // Perform cleanup if err := r.deleteExternalResources(database); err != nil { return ctrl.Result{}, err } // Remove finalizer controllerutil.RemoveFinalizer(database, databaseFinalizer) err := r.Update(ctx, database) if err != nil { return ctrl.Result{}, err } } return ctrl.Result{}, nil } // Add finalizer if not present if !controllerutil.ContainsFinalizer(database, databaseFinalizer) { controllerutil.AddFinalizer(database, databaseFinalizer) err = r.Update(ctx, database) if err != nil { return ctrl.Result{}, err } } // Normal reconciliation logic... return ctrl.Result{}, nil }
โก Operator Frameworks
Tools and SDKs that simplify building Kubernetes operators with best practices built-in.
Framework | Language | Complexity | Best For |
---|---|---|---|
Operator SDK | Go, Ansible, Helm | Medium | Production operators with complex logic |
Kubebuilder | Go | Medium-High | Advanced operators with custom APIs |
KUDO | YAML | Low | Simple operators without coding |
Kopf | Python | Low-Medium | Python developers, rapid prototyping |
Metacontroller | Any (Webhooks) | Low | Simple controllers with webhooks |
Operator SDK Quick Start
operator-sdk init --domain example.com --repo github.com/example/database-operator
Initialize a new operator project
operator-sdk create api --group example --version v1 --kind Database --resource --controller
Create API and controller for Database resource
make manifests
Generate CRD manifests from Go types
make install
Install CRDs into cluster
make run
Run operator locally for development
make docker-build docker-push IMG=example/database-operator:v1.0.0
Build and push operator image
make deploy IMG=example/database-operator:v1.0.0
Deploy operator to cluster
๐ฆ Operator Lifecycle Manager (OLM)
Creating an Operator Bundle
apiVersion: operators.coreos.com/v1alpha1 kind: ClusterServiceVersion metadata: name: database-operator.v1.0.0 namespace: operators spec: displayName: Database Operator description: | The Database Operator manages PostgreSQL, MySQL, and MongoDB instances with automated backups, scaling, and failover capabilities. version: 1.0.0 replaces: database-operator.v0.9.0 icon: - base64data: "iVBORw0KGgoAAAANS..." mediatype: "image/png" customresourcedefinitions: owned: - name: databases.example.com version: v1 kind: Database displayName: Database description: Represents a database instance install: strategy: deployment spec: deployments: - name: database-operator spec: replicas: 1 selector: matchLabels: name: database-operator template: metadata: labels: name: database-operator spec: serviceAccountName: database-operator containers: - name: database-operator image: example/database-operator:v1.0.0 command: - database-operator env: - name: WATCH_NAMESPACE value: "" - name: OPERATOR_NAME value: "database-operator" permissions: - serviceAccountName: database-operator rules: - apiGroups: [""] resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "events", "configmaps", "secrets"] verbs: ["*"] - apiGroups: ["apps"] resources: ["deployments", "daemonsets", "replicasets", "statefulsets"] verbs: ["*"] - apiGroups: ["example.com"] resources: ["databases", "databases/status", "databases/finalizers"] verbs: ["*"]
Installing with OLM
# Create a CatalogSource apiVersion: operators.coreos.com/v1alpha1 kind: CatalogSource metadata: name: database-operators namespace: olm spec: sourceType: grpc image: example/database-operator-catalog:latest --- # Create a Subscription apiVersion: operators.coreos.com/v1alpha1 kind: Subscription metadata: name: database-operator namespace: operators spec: channel: stable name: database-operator source: database-operators sourceNamespace: olm installPlanApproval: Automatic
๐ Operator Lifecycle Management
Operator Maturity Model
Basic Install
Automated application provisioning and configuration
Seamless Upgrades
Patch and minor version upgrades supported
Full Lifecycle
App lifecycle, storage lifecycle, backups, failure recovery
Deep Insights
Metrics, alerts, log processing, workload analysis
Auto Pilot
Auto-scaling, auto-tuning, abnormality detection
Testing Operators
package controllers_test import ( "context" "testing" "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" examplev1 "example.com/api/v1" ) var _ = Describe("Database Controller", func() { Context("When creating a Database", func() { It("Should create a StatefulSet", func() { ctx := context.Background() database := &examplev1.Database{ ObjectMeta: metav1.ObjectMeta{ Name: "test-database", Namespace: "default", }, Spec: examplev1.DatabaseSpec{ Engine: "postgres", Version: "14", Replicas: 3, Storage: examplev1.StorageSpec{ Size: "10Gi", Class: "standard", }, }, } Expect(k8sClient.Create(ctx, database)).Should(Succeed()) statefulSet := &appsv1.StatefulSet{} Eventually(func() bool { err := k8sClient.Get(ctx, client.ObjectKey{ Name: "test-database-statefulset", Namespace: "default", }, statefulSet) return err == nil }, time.Second*10, time.Second).Should(BeTrue()) Expect(*statefulSet.Spec.Replicas).Should(Equal(int32(3))) Expect(statefulSet.Spec.Template.Spec.Containers[0].Image).Should(Equal("postgres:14")) }) It("Should update Database status", func() { ctx := context.Background() database := &examplev1.Database{} Eventually(func() string { err := k8sClient.Get(ctx, client.ObjectKey{ Name: "test-database", Namespace: "default", }, database) if err != nil { return "" } return database.Status.Phase }, time.Second*10, time.Second).Should(Equal("Running")) }) }) })
Monitoring Operators
// Add Prometheus metrics to your controller import ( "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/metrics" ) var ( reconciliationDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "database_operator_reconciliation_duration_seconds", Help: "Duration of reconciliation in seconds", }, []string{"database", "namespace"}, ) databasesTotal = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "database_operator_databases_total", Help: "Total number of databases managed", }, []string{"engine", "status"}, ) ) func init() { metrics.Registry.MustRegister(reconciliationDuration, databasesTotal) } func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { start := time.Now() defer func() { reconciliationDuration.WithLabelValues( req.Name, req.Namespace, ).Observe(time.Since(start).Seconds()) }() // Reconciliation logic... }
๐ Deployment Best Practices
Production Checklist
Leader Election Configuration
func main() { mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, MetricsBindAddress: ":8080", Port: 9443, HealthProbeBindAddress: ":8081", LeaderElection: true, LeaderElectionID: "database-operator.example.com", LeaderElectionNamespace: "operators", LeaseDuration: 15 * time.Second, RenewDeadline: 10 * time.Second, RetryPeriod: 2 * time.Second, }) if err != nil { setupLog.Error(err, "unable to start manager") os.Exit(1) } // Setup controllers... }
๐ Popular Kubernetes Operators
Prometheus Operator
Manages Prometheus instances, ServiceMonitors, and PrometheusRules for monitoring
kubectl create -f https://operatorhub.io/install/prometheus.yaml
PostgreSQL Operator (Zalando)
Creates and manages PostgreSQL clusters with streaming replication and backups
kubectl apply -f https://operatorhub.io/install/postgresql.yaml
Strimzi Kafka Operator
Deploys and manages Apache Kafka clusters, topics, and users
kubectl create -f https://operatorhub.io/install/strimzi-kafka-operator.yaml
Elastic Cloud on Kubernetes
Deploy, manage, and orchestrate Elasticsearch clusters
kubectl apply -f https://download.elastic.co/downloads/eck/2.5.0/crds.yaml
MongoDB Community Operator
Manages MongoDB replica sets with authentication and TLS
kubectl apply -f https://operatorhub.io/install/mongodb-community-kubernetes-operator.yaml
Cert-Manager
Automates certificate management using Let's Encrypt and other issuers
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.10.0/cert-manager.yaml
๐พ Building a Stateful Service Operator
Example: Redis Operator
# Redis CRD apiVersion: redis.example.com/v1 kind: RedisCluster metadata: name: redis-production spec: replicas: 6 # 3 masters, 3 replicas version: "7.0" persistence: enabled: true size: 10Gi auth: enabled: true secretName: redis-auth backup: enabled: true schedule: "0 */6 * * *" destination: s3://backups/redis monitoring: enabled: true serviceMonitor: true --- # Operator will create: # - StatefulSet for Redis nodes # - Services for client access # - ConfigMaps for configuration # - Secrets for authentication # - PodDisruptionBudget for availability # - ServiceMonitor for Prometheus # - CronJob for backups
Advanced Features Implementation
// Auto-scaling based on memory usage func (r *RedisReconciler) handleAutoScaling(ctx context.Context, redis *redisv1.RedisCluster) error { // Get current metrics metrics, err := r.getRedisMetrics(redis) if err != nil { return err } currentReplicas := redis.Spec.Replicas desiredReplicas := currentReplicas // Scale up if memory usage > 80% if metrics.MemoryUsagePercent > 80 { desiredReplicas = min(currentReplicas+2, redis.Spec.MaxReplicas) log.Info("Scaling up Redis cluster", "from", currentReplicas, "to", desiredReplicas) } // Scale down if memory usage < 30% if metrics.MemoryUsagePercent < 30 && currentReplicas > redis.Spec.MinReplicas { desiredReplicas = max(currentReplicas-2, redis.Spec.MinReplicas) log.Info("Scaling down Redis cluster", "from", currentReplicas, "to", desiredReplicas) } if desiredReplicas != currentReplicas { redis.Spec.Replicas = desiredReplicas return r.Update(ctx, redis) } return nil } // Automated failover handling func (r *RedisReconciler) handleFailover(ctx context.Context, redis *redisv1.RedisCluster) error { masters, replicas, err := r.getRedisTopology(redis) if err != nil { return err } for _, master := range masters { if !master.IsHealthy() { log.Info("Master node unhealthy, initiating failover", "node", master.Name) // Find best replica to promote bestReplica := r.selectBestReplica(replicas, master) if bestReplica == nil { return fmt.Errorf("no suitable replica found for failover") } // Promote replica to master if err := r.promoteReplica(bestReplica); err != nil { return err } // Update cluster configuration redis.Status.Topology = r.updateTopology(masters, replicas, master, bestReplica) redis.Status.LastFailover = metav1.Now() return r.Status().Update(ctx, redis) } } return nil }
๐ง Troubleshooting Operators
- Reconciliation Loop: Add requeue delays, implement backoff
- Memory Leaks: Proper cleanup, limit watch scope
- RBAC Errors: Review and update ClusterRole permissions
- Webhook Failures: Check certificates, network policies
- Performance: Use indexers, limit reconciliation frequency
Debugging Commands
kubectl logs -n operators deployment/database-operator -f
View operator logs
kubectl get events --sort-by='.lastTimestamp' -A | grep database
Check recent events
kubectl describe database.example.com/my-database
Inspect custom resource status
kubectl api-resources --api-group=example.com
List registered custom resources