diff --git a/README.md b/README.md new file mode 100644 index 0000000..96b4c0f --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Recovery + +⚠️ Restore NICHT über Argo deployen + +Use: + +kubectl apply -f restore-latest.yaml \ No newline at end of file diff --git a/argocd/applications/cnpg-operator.yaml b/argocd/applications/cnpg-operator.yaml index 410ef6a..e2844f5 100644 --- a/argocd/applications/cnpg-operator.yaml +++ b/argocd/applications/cnpg-operator.yaml @@ -1,9 +1,10 @@ -# 1. App für den CloudNativePG Operator apiVersion: argoproj.io/v1alpha1 kind: Application metadata: name: cnpg-operator namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "1" spec: project: default source: @@ -19,16 +20,15 @@ spec: syncPolicy: automated: prune: true - selfHeel: true + selfHeal: true syncOptions: - CreateNamespace=true - ServerSideApply=true + - SkipDryRunOnMissingResource=true automated: allowEmpty: true ignoreDifferences: - group: apiextensions.k8s.io kind: CustomResourceDefinition jqPathExpressions: - - .spec.conversion.webhook.clientConfig.caBundle - dependsOn: - - name: timescaledb-crds \ No newline at end of file + - .spec.conversion.webhook.clientConfig.caBundle \ No newline at end of file diff --git a/argocd/applications/minio.yaml b/argocd/applications/minio.yaml index 4da0d55..d79dddc 100644 --- a/argocd/applications/minio.yaml +++ b/argocd/applications/minio.yaml @@ -27,7 +27,7 @@ spec: syncPolicy: automated: prune: true - selfHeel: true + selfHeal: true syncOptions: - CreateNamespace=true - ServerSideApply=true diff --git a/argocd/applications/timescaledb-crds.yaml b/argocd/applications/timescaledb-crds.yaml index 970614b..ce0659c 100644 --- a/argocd/applications/timescaledb-crds.yaml +++ b/argocd/applications/timescaledb-crds.yaml @@ -3,6 +3,8 @@ kind: Application metadata: name: timescaledb-crds namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "0" spec: project: default source: @@ -15,4 +17,4 @@ spec: syncPolicy: automated: prune: true - selfHeel: true + selfHeal: true diff --git a/argocd/applications/timescaledb.yaml b/argocd/applications/timescaledb.yaml index 0e1b8f8..176f0ab 100644 --- a/argocd/applications/timescaledb.yaml +++ b/argocd/applications/timescaledb.yaml @@ -3,6 +3,8 @@ kind: Application metadata: name: timescaledb namespace: argocd + annotations: + argocd.argoproj.io/sync-wave: "2" spec: project: default source: @@ -15,14 +17,13 @@ spec: syncPolicy: automated: prune: true - selfHeel: true + selfHeal: true syncOptions: - CreateNamespace=true - ServerSideApply=true + - SkipDryRunOnMissingResource=true ignoreDifferences: - group: postgresql.cnpg.io kind: Cluster jqPathExpressions: - - .status - dependsOn: - - name: cnpg-operator \ No newline at end of file + - .status \ No newline at end of file diff --git a/argocd/root.yaml b/argocd/root.yaml new file mode 100644 index 0000000..ad44f10 --- /dev/null +++ b/argocd/root.yaml @@ -0,0 +1,18 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: root-apps + namespace: argocd +spec: + project: default + source: + repoURL: https://gitea.vhn-demo.duckdns.org/hoang/timescaledb.git + path: argocd/applications + targetRevision: main + destination: + server: https://kubernetes.default.svc + namespace: argocd + syncPolicy: + automated: + prune: true + selfHeal: true \ No newline at end of file diff --git a/k8s/base/database/backup-daily.yaml b/k8s/base/database/backup-daily.yaml new file mode 100644 index 0000000..e2e35b5 --- /dev/null +++ b/k8s/base/database/backup-daily.yaml @@ -0,0 +1,11 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: timescale-backup-daily + namespace: database +spec: + schedule: "0 2 * * *" # täglich 02:00 + backupOwnerReference: self + cluster: + name: timescale-cluster + method: barmanObjectStore \ No newline at end of file diff --git a/k8s/base/database/backup-hourly.yaml b/k8s/base/database/backup-hourly.yaml new file mode 100644 index 0000000..2a62622 --- /dev/null +++ b/k8s/base/database/backup-hourly.yaml @@ -0,0 +1,12 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: timescale-backup-hourly + namespace: database +spec: + schedule: "0 * * * *" # jede Stunden + backupOwnerReference: self + cluster: + name: timescale-cluster + method: barmanObjectStore + diff --git a/k8s/base/database/backup-monitor.yaml b/k8s/base/database/backup-monitor.yaml new file mode 100644 index 0000000..cb61e82 --- /dev/null +++ b/k8s/base/database/backup-monitor.yaml @@ -0,0 +1,17 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: cnpg-backup-alert + namespace: database +spec: + groups: + - name: cnpg.rules + rules: + - alert: CNPGBackupMissing + expr: time() - cnpg_last_backup_time_seconds > 86400 + for: 10m + labels: + severity: critical + annotations: + summary: "CNPG Backup missing" + description: "No backup in last 24h" \ No newline at end of file diff --git a/k8s/base/database/cluster.yaml b/k8s/base/database/cluster.yaml index 591449a..12e4a69 100644 --- a/k8s/base/database/cluster.yaml +++ b/k8s/base/database/cluster.yaml @@ -4,10 +4,51 @@ metadata: name: timescale-cluster spec: instances: 3 + imageName: timescale/timescaledb:2.15.3-pg15 + + # ✅ Initiales Setup + bootstrap: + initdb: + database: app + owner: app + + # ✅ Storage storage: size: 50Gi + + # ✅ Ressourcen (wichtig!) + resources: + requests: + memory: "2Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + + # ✅ High Availability + affinity: + enablePodAntiAffinity: true + topologyKey: kubernetes.io/hostname + + # ✅ Monitoring + monitoring: + enablePodMonitor: true + + # ✅ PostgreSQL / Timescale Tuning + postgresql: + parameters: + max_connections: "100" # mehr Verbindungen 200 + shared_buffers: "256MB" # Cache im RAM 1GB + effective_cache_size: "3GB" # Optimizer hint + work_mem: "16MB" # pro Query + maintenance_work_mem: "256MB" # für VACUUM/REINDEX + wal_buffers: "16MB" + checkpoint_completion_target: "0.9" + random_page_cost: "1.1" # SSD optimiert + effective_io_concurrency: "200" backup: + retentionPolicy: "7d" # Backup älter als 7 Tage -> automatisch gelöscht barmanObjectStore: destinationPath: "s3://backups/" endpointURL: "http://minio.minio-system.svc.cluster.local:9000" @@ -18,3 +59,5 @@ spec: secretAccessKey: name: s3-creds key: SECRET_ACCESS_KEY + wal: + compression: gzip \ No newline at end of file diff --git a/k8s/base/database/kustomization.yaml b/k8s/base/database/kustomization.yaml index a66a1ee..cf02541 100644 --- a/k8s/base/database/kustomization.yaml +++ b/k8s/base/database/kustomization.yaml @@ -1,2 +1,9 @@ resources: - cluster.yaml + - s3-secret.yaml + - backup-hourly.yaml + - backup-daily.yaml + - restore-cronjob.yaml + - restore-configmap.yaml + + diff --git a/k8s/base/database/restore-configmap.yaml b/k8s/base/database/restore-configmap.yaml new file mode 100644 index 0000000..bb81152 --- /dev/null +++ b/k8s/base/database/restore-configmap.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: restore-test-manifest + namespace: database +data: + restore-test.yaml: | + apiVersion: postgresql.cnpg.io/v1 + kind: Cluster + metadata: + name: timescale-restore-test + spec: + instances: 1 + imageName: timescale/timescaledb:2.15.3-pg15 + bootstrap: + recovery: + source: timescale-cluster + recoveryTarget: + targetImmediate: true + storage: + size: 10Gi + externalClusters: + - name: timescale-cluster + barmanObjectStore: + destinationPath: "s3://backups/" + endpointURL: "http://minio.minio-system.svc.cluster.local:9000" + s3Credentials: + accessKeyId: + name: s3-creds + key: S3_ACCESS_KEY_ID + secretAccessKey: + name: s3-creds + key: S3_SECRET_ACCESS_KEY \ No newline at end of file diff --git a/k8s/base/database/restore-cronjob.yaml b/k8s/base/database/restore-cronjob.yaml new file mode 100644 index 0000000..e439da6 --- /dev/null +++ b/k8s/base/database/restore-cronjob.yaml @@ -0,0 +1,34 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: timescale-restore-test + namespace: database +spec: + schedule: "0 4 * * *" # täglich 04:00 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: restore-test + image: bitnami/kubectl:latest + command: + - /bin/sh + - -c + - | + kubectl apply -f /manifests/restore-test.yaml + sleep 300 + + kubectl get pods -n database + + echo "✅ Restore Test executed" + + kubectl delete cluster timescale-restore-test -n database || true + volumeMounts: + - name: manifests + mountPath: /manifests + volumes: + - name: manifests + configMap: + name: restore-test-manifest diff --git a/k8s/base/database/s3-secret.yaml b/k8s/base/database/s3-secret.yaml new file mode 100644 index 0000000..f92de60 --- /dev/null +++ b/k8s/base/database/s3-secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: s3-creds + namespace: database +type: Opaque +stringData: + S3_ACCESS_KEY_ID: admin + S3_SECRET_ACCESS_KEY: password123 \ No newline at end of file diff --git a/k8s/recovery/restore-latest.yaml b/k8s/recovery/restore-latest.yaml new file mode 100644 index 0000000..fe31e16 --- /dev/null +++ b/k8s/recovery/restore-latest.yaml @@ -0,0 +1,25 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: timescale-restore-latest + namespace: database +spec: + instances: 1 + imageName: timescale/timescaledb:2.15.3-pg15 + bootstrap: + recovery: + source: timescale-cluster + storage: + size: 50Gi + externalClusters: + - name: timescale-cluster + barmanObjectStore: + destinationPath: "s3://backups/" + endpointURL: "http://minio.minio-system.svc.cluster.local:9000" + s3Credentials: + accessKeyId: + name: s3-creds + key: S3_ACCESS_KEY_ID + secretAccessKey: + name: s3-creds + key: S3_SECRET_ACCESS_KEY \ No newline at end of file diff --git a/k8s/recovery/restore-test.yaml b/k8s/recovery/restore-test.yaml new file mode 100644 index 0000000..23c9473 --- /dev/null +++ b/k8s/recovery/restore-test.yaml @@ -0,0 +1,34 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: timescale-restore-test + namespace: database +spec: + instances: 1 + imageName: timescale/timescaledb:2.15.3-pg15 + bootstrap: + recovery: + source: timescale-cluster + recoveryTarget: + targetImmediate: true # sofort letzter WAL Stand + storage: + size: 10Gi # kleiner für Tests + resources: + requests: + memory: "512Mi" + cpu: "200m" + limits: + memory: "1Gi" + cpu: "500m" + externalClusters: + - name: timescale-cluster + barmanObjectStore: + destinationPath: "s3://backups/" + endpointURL: "http://minio.minio-system.svc.cluster.local:9000" + s3Credentials: + accessKeyId: + name: s3-creds + key: S3_ACCESS_KEY_ID + secretAccessKey: + name: s3-creds + key: S3_SECRET_ACCESS_KEY \ No newline at end of file diff --git a/k8s/recovery/restore-timestamp.yaml b/k8s/recovery/restore-timestamp.yaml new file mode 100644 index 0000000..a3bc630 --- /dev/null +++ b/k8s/recovery/restore-timestamp.yaml @@ -0,0 +1,27 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: timescale-restore-pitr + namespace: database +spec: + instances: 1 + imageName: timescale/timescaledb:2.15.3-pg15 + bootstrap: + recovery: + source: timescale-cluster + recoveryTarget: + targetTime: "2026-05-06 02:30:00" # ANPASSEN + storage: + size: 50Gi + externalClusters: + - name: timescale-cluster + barmanObjectStore: + destinationPath: "s3://backups/" + endpointURL: "http://minio.minio-system.svc.cluster.local:9000" + s3Credentials: + accessKeyId: + name: s3-creds + key: S3_ACCESS_KEY_ID + secretAccessKey: + name: s3-creds + key: S3_SECRET_ACCESS_KEY \ No newline at end of file diff --git a/scripts/restore.sh b/scripts/restore.sh new file mode 100644 index 0000000..6ef5958 --- /dev/null +++ b/scripts/restore.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +set -e + +MODE=$1 +TIMESTAMP=$2 + +echo "🚀 Starting restore..." + +if [ "$MODE" == "latest" ]; then + kubectl apply -f restore-latest.yaml + +elif [ "$MODE" == "pitr" ]; then + if [ -z "$TIMESTAMP" ]; then + echo "❌ Please provide timestamp" + exit 1 + fi + + sed "s/TIMESTAMP/$TIMESTAMP/" restore-timestamp.yaml | kubectl apply -f - + +elif [ "$MODE" == "test" ]; then + kubectl apply -f restore-test.yaml + +else + echo "Usage:" + echo " ./restore.sh latest" + echo " ./restore.sh pitr '2026-05-06 02:30:00'" + echo " ./restore.sh test" + exit 1 +fi + +echo "✅ Restore triggered"