weitere implementierung

This commit is contained in:
Hoang Nguyen
2026-05-06 11:19:18 +02:00
parent d8d4d1cbe9
commit aca2d6be33
18 changed files with 323 additions and 11 deletions

7
README.md Normal file
View File

@@ -0,0 +1,7 @@
# Recovery
⚠️ Restore NICHT über Argo deployen
Use:
kubectl apply -f restore-latest.yaml

View File

@@ -1,9 +1,10 @@
# 1. App für den CloudNativePG Operator
apiVersion: argoproj.io/v1alpha1 apiVersion: argoproj.io/v1alpha1
kind: Application kind: Application
metadata: metadata:
name: cnpg-operator name: cnpg-operator
namespace: argocd namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "1"
spec: spec:
project: default project: default
source: source:
@@ -19,10 +20,11 @@ spec:
syncPolicy: syncPolicy:
automated: automated:
prune: true prune: true
selfHeel: true selfHeal: true
syncOptions: syncOptions:
- CreateNamespace=true - CreateNamespace=true
- ServerSideApply=true - ServerSideApply=true
- SkipDryRunOnMissingResource=true
automated: automated:
allowEmpty: true allowEmpty: true
ignoreDifferences: ignoreDifferences:
@@ -30,5 +32,3 @@ spec:
kind: CustomResourceDefinition kind: CustomResourceDefinition
jqPathExpressions: jqPathExpressions:
- .spec.conversion.webhook.clientConfig.caBundle - .spec.conversion.webhook.clientConfig.caBundle
dependsOn:
- name: timescaledb-crds

View File

@@ -27,7 +27,7 @@ spec:
syncPolicy: syncPolicy:
automated: automated:
prune: true prune: true
selfHeel: true selfHeal: true
syncOptions: syncOptions:
- CreateNamespace=true - CreateNamespace=true
- ServerSideApply=true - ServerSideApply=true

View File

@@ -3,6 +3,8 @@ kind: Application
metadata: metadata:
name: timescaledb-crds name: timescaledb-crds
namespace: argocd namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "0"
spec: spec:
project: default project: default
source: source:
@@ -15,4 +17,4 @@ spec:
syncPolicy: syncPolicy:
automated: automated:
prune: true prune: true
selfHeel: true selfHeal: true

View File

@@ -3,6 +3,8 @@ kind: Application
metadata: metadata:
name: timescaledb name: timescaledb
namespace: argocd namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "2"
spec: spec:
project: default project: default
source: source:
@@ -15,14 +17,13 @@ spec:
syncPolicy: syncPolicy:
automated: automated:
prune: true prune: true
selfHeel: true selfHeal: true
syncOptions: syncOptions:
- CreateNamespace=true - CreateNamespace=true
- ServerSideApply=true - ServerSideApply=true
- SkipDryRunOnMissingResource=true
ignoreDifferences: ignoreDifferences:
- group: postgresql.cnpg.io - group: postgresql.cnpg.io
kind: Cluster kind: Cluster
jqPathExpressions: jqPathExpressions:
- .status - .status
dependsOn:
- name: cnpg-operator

18
argocd/root.yaml Normal file
View File

@@ -0,0 +1,18 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: root-apps
namespace: argocd
spec:
project: default
source:
repoURL: https://gitea.vhn-demo.duckdns.org/hoang/timescaledb.git
path: argocd/applications
targetRevision: main
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true

View File

@@ -0,0 +1,11 @@
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: timescale-backup-daily
namespace: database
spec:
schedule: "0 2 * * *" # täglich 02:00
backupOwnerReference: self
cluster:
name: timescale-cluster
method: barmanObjectStore

View File

@@ -0,0 +1,12 @@
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: timescale-backup-hourly
namespace: database
spec:
schedule: "0 * * * *" # jede Stunden
backupOwnerReference: self
cluster:
name: timescale-cluster
method: barmanObjectStore

View File

@@ -0,0 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cnpg-backup-alert
namespace: database
spec:
groups:
- name: cnpg.rules
rules:
- alert: CNPGBackupMissing
expr: time() - cnpg_last_backup_time_seconds > 86400
for: 10m
labels:
severity: critical
annotations:
summary: "CNPG Backup missing"
description: "No backup in last 24h"

View File

@@ -4,10 +4,51 @@ metadata:
name: timescale-cluster name: timescale-cluster
spec: spec:
instances: 3 instances: 3
imageName: timescale/timescaledb:2.15.3-pg15 imageName: timescale/timescaledb:2.15.3-pg15
# ✅ Initiales Setup
bootstrap:
initdb:
database: app
owner: app
# ✅ Storage
storage: storage:
size: 50Gi size: 50Gi
# ✅ Ressourcen (wichtig!)
resources:
requests:
memory: "2Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2"
# ✅ High Availability
affinity:
enablePodAntiAffinity: true
topologyKey: kubernetes.io/hostname
# ✅ Monitoring
monitoring:
enablePodMonitor: true
# ✅ PostgreSQL / Timescale Tuning
postgresql:
parameters:
max_connections: "100" # mehr Verbindungen 200
shared_buffers: "256MB" # Cache im RAM 1GB
effective_cache_size: "3GB" # Optimizer hint
work_mem: "16MB" # pro Query
maintenance_work_mem: "256MB" # für VACUUM/REINDEX
wal_buffers: "16MB"
checkpoint_completion_target: "0.9"
random_page_cost: "1.1" # SSD optimiert
effective_io_concurrency: "200"
backup: backup:
retentionPolicy: "7d" # Backup älter als 7 Tage -> automatisch gelöscht
barmanObjectStore: barmanObjectStore:
destinationPath: "s3://backups/" destinationPath: "s3://backups/"
endpointURL: "http://minio.minio-system.svc.cluster.local:9000" endpointURL: "http://minio.minio-system.svc.cluster.local:9000"
@@ -18,3 +59,5 @@ spec:
secretAccessKey: secretAccessKey:
name: s3-creds name: s3-creds
key: SECRET_ACCESS_KEY key: SECRET_ACCESS_KEY
wal:
compression: gzip

View File

@@ -1,2 +1,9 @@
resources: resources:
- cluster.yaml - cluster.yaml
- s3-secret.yaml
- backup-hourly.yaml
- backup-daily.yaml
- restore-cronjob.yaml
- restore-configmap.yaml

View File

@@ -0,0 +1,33 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: restore-test-manifest
namespace: database
data:
restore-test.yaml: |
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: timescale-restore-test
spec:
instances: 1
imageName: timescale/timescaledb:2.15.3-pg15
bootstrap:
recovery:
source: timescale-cluster
recoveryTarget:
targetImmediate: true
storage:
size: 10Gi
externalClusters:
- name: timescale-cluster
barmanObjectStore:
destinationPath: "s3://backups/"
endpointURL: "http://minio.minio-system.svc.cluster.local:9000"
s3Credentials:
accessKeyId:
name: s3-creds
key: S3_ACCESS_KEY_ID
secretAccessKey:
name: s3-creds
key: S3_SECRET_ACCESS_KEY

View File

@@ -0,0 +1,34 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: timescale-restore-test
namespace: database
spec:
schedule: "0 4 * * *" # täglich 04:00
jobTemplate:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: restore-test
image: bitnami/kubectl:latest
command:
- /bin/sh
- -c
- |
kubectl apply -f /manifests/restore-test.yaml
sleep 300
kubectl get pods -n database
echo "✅ Restore Test executed"
kubectl delete cluster timescale-restore-test -n database || true
volumeMounts:
- name: manifests
mountPath: /manifests
volumes:
- name: manifests
configMap:
name: restore-test-manifest

View File

@@ -0,0 +1,9 @@
apiVersion: v1
kind: Secret
metadata:
name: s3-creds
namespace: database
type: Opaque
stringData:
S3_ACCESS_KEY_ID: admin
S3_SECRET_ACCESS_KEY: password123

View File

@@ -0,0 +1,25 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: timescale-restore-latest
namespace: database
spec:
instances: 1
imageName: timescale/timescaledb:2.15.3-pg15
bootstrap:
recovery:
source: timescale-cluster
storage:
size: 50Gi
externalClusters:
- name: timescale-cluster
barmanObjectStore:
destinationPath: "s3://backups/"
endpointURL: "http://minio.minio-system.svc.cluster.local:9000"
s3Credentials:
accessKeyId:
name: s3-creds
key: S3_ACCESS_KEY_ID
secretAccessKey:
name: s3-creds
key: S3_SECRET_ACCESS_KEY

View File

@@ -0,0 +1,34 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: timescale-restore-test
namespace: database
spec:
instances: 1
imageName: timescale/timescaledb:2.15.3-pg15
bootstrap:
recovery:
source: timescale-cluster
recoveryTarget:
targetImmediate: true # sofort letzter WAL Stand
storage:
size: 10Gi # kleiner für Tests
resources:
requests:
memory: "512Mi"
cpu: "200m"
limits:
memory: "1Gi"
cpu: "500m"
externalClusters:
- name: timescale-cluster
barmanObjectStore:
destinationPath: "s3://backups/"
endpointURL: "http://minio.minio-system.svc.cluster.local:9000"
s3Credentials:
accessKeyId:
name: s3-creds
key: S3_ACCESS_KEY_ID
secretAccessKey:
name: s3-creds
key: S3_SECRET_ACCESS_KEY

View File

@@ -0,0 +1,27 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: timescale-restore-pitr
namespace: database
spec:
instances: 1
imageName: timescale/timescaledb:2.15.3-pg15
bootstrap:
recovery:
source: timescale-cluster
recoveryTarget:
targetTime: "2026-05-06 02:30:00" # ANPASSEN
storage:
size: 50Gi
externalClusters:
- name: timescale-cluster
barmanObjectStore:
destinationPath: "s3://backups/"
endpointURL: "http://minio.minio-system.svc.cluster.local:9000"
s3Credentials:
accessKeyId:
name: s3-creds
key: S3_ACCESS_KEY_ID
secretAccessKey:
name: s3-creds
key: S3_SECRET_ACCESS_KEY

32
scripts/restore.sh Normal file
View File

@@ -0,0 +1,32 @@
#!/bin/bash
set -e
MODE=$1
TIMESTAMP=$2
echo "🚀 Starting restore..."
if [ "$MODE" == "latest" ]; then
kubectl apply -f restore-latest.yaml
elif [ "$MODE" == "pitr" ]; then
if [ -z "$TIMESTAMP" ]; then
echo "❌ Please provide timestamp"
exit 1
fi
sed "s/TIMESTAMP/$TIMESTAMP/" restore-timestamp.yaml | kubectl apply -f -
elif [ "$MODE" == "test" ]; then
kubectl apply -f restore-test.yaml
else
echo "Usage:"
echo " ./restore.sh latest"
echo " ./restore.sh pitr '2026-05-06 02:30:00'"
echo " ./restore.sh test"
exit 1
fi
echo "✅ Restore triggered"