Browse Source

add ntfy, start on backup strategy

Josh Bicking 6 months ago
parent
commit
dadd05a339

+ 38 - 2
README.md

@@ -233,6 +233,41 @@ kubectl expose svc/some-service --name=some-service-external --port 1234 --targe
 
 Service will then be available on port 1234 of any k8s node.
 
+# Backups
+
+## velero
+```
+KUBECONFIG=/etc/rancher/k3s/k3s.yaml velero install \
+ --provider aws \
+ --plugins velero/velero-plugin-for-aws:v1.0.0 \
+ --bucket velero  \
+ --secret-file ./credentials-velero  \
+ --use-volume-snapshots=true \
+ --default-volumes-to-fs-backup \
+ --use-node-agent \
+ --backup-location-config region=default,s3ForcePathStyle="true",s3Url=http://172.16.69.234:9000  \
+ --snapshot-location-config region="default"
+```
+
+Had to remove `resources:` from the daemonset.
+
+### Change s3 target:
+```
+kubectl -n velero edit backupstoragelocation default
+```
+
+### Using a local storage storageClass in the target
+
+https://velero.io/docs/v1.3.0/restore-reference/
+
+Velero does not support hostPath PVCs, but works just fine with the `openebs-hostpath` storageClass.
+
+```
+KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm install openebs --namespace openebs openebs/openebs --create-namespace --set localprovisioner.basePath=/k3s-storage/openebs
+```
+
+This is a nice PVC option for simpler backup target setups.
+
 
 # libvirtd
 
@@ -241,6 +276,7 @@ Service will then be available on port 1234 of any k8s node.
 # Still to do
 
 - deluge
-- gogs ssh ingress (can't go through cloudflare without cloudflared on the client)
+- gogs ssh ingress?
+  - can't go through cloudflare without cloudflared on the client
+  - cloudflared running in the gogs pod?
 - Something better than `expose` for accessing internal services
-- replicated_ssd crush rule never resolves (or didn't on `data-metadata`)

+ 70 - 0
backup/cloudflared.yaml

@@ -0,0 +1,70 @@
+# https://github.com/cloudflare/argo-tunnel-examples/blob/master/named-tunnel-k8s/cloudflared.yaml
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: cloudflared
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      app: cloudflared
+  replicas: 3
+  template:
+    metadata:
+      labels:
+        app: cloudflared
+    spec:
+      containers:
+      - name: cloudflared
+        image: cloudflare/cloudflared:2023.8.2
+        args:
+        - tunnel
+        - --config
+        - /etc/cloudflared/config/config.yaml
+        - run
+        ports:
+        - containerPort: 2000
+          name: metrics
+        livenessProbe:
+          httpGet:
+            path: /ready
+            port: 2000
+          failureThreshold: 1
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        volumeMounts:
+        - name: config
+          mountPath: /etc/cloudflared/config
+          readOnly: true
+        - name: creds
+          mountPath: /etc/cloudflared/creds
+          readOnly: true
+      volumes:
+      - name: creds
+        secret:
+          secretName: tunnel-credentials
+      - name: config
+        configMap:
+          name: cloudflared
+          items:
+          - key: config.yaml
+            path: config.yaml
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: cloudflared
+  namespace: kube-system
+data:
+  config.yaml: |
+    tunnel: example-tunnel
+    credentials-file: /etc/cloudflared/creds/credentials.json
+    metrics: 0.0.0.0:2000
+    ingress:
+    - hostname: vaultwarden.bnuuy.org
+      service: http://vaultwarden-service.vaultwarden.svc.cluster.local:80
+    - hostname: vaultwarden.bnuuy.org
+      path: /notifications/hub.*
+      service: http://vaultwarden-service.vaultwarden.svc.cluster.local:3012
+    - service: http_status:404

+ 21 - 0
backup/minio-pvc.yaml

@@ -0,0 +1,21 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: minio
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: minio-pvc
+  namespace: minio
+  labels:
+    app: minio
+spec:
+  storageClassName: local-path
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1800Gi
+

+ 83 - 0
backup/minio.yaml

@@ -0,0 +1,83 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: minio
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: minio
+  namespace: minio
+spec:
+  selector:
+    matchLabels:
+      app: minio
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: minio
+    spec:
+      containers:
+      - name: minio
+        image: "quay.io/minio/minio:RELEASE.2023-09-16T01-01-47Z"
+        command: ["minio", "server", "/data", "--console-address", ":9090"]
+        ports:
+        - containerPort: 9000
+          name: http-web-svc
+        - containerPort: 9090
+          name: http-con-svc
+        envFrom:
+        - secretRef:
+            name: minio-secret
+        env:
+        volumeMounts:
+        - mountPath: "/data"
+          name: data
+        livenessProbe:
+          httpGet:
+            path: /minio/health/live
+            port: 9000
+          failureThreshold: 10
+          initialDelaySeconds: 30
+          periodSeconds: 10
+      volumes:
+      - name: data
+        persistentVolumeClaim:
+          claimName: minio-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: minio-service
+  namespace: minio
+spec:
+  selector:
+    app: minio
+  type: ClusterIP
+  ports:
+  - name: minio-web-port
+    protocol: TCP
+    port: 9000
+    targetPort: http-web-svc
+  - name: minio-con-port
+    protocol: TCP
+    port: 9090
+    targetPort: http-con-svc
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: minio
+  namespace: minio
+spec:
+  entryPoints:
+  - websecure
+  routes:
+  - kind: Rule
+    match: Host(`s3.bnuuy.org`)
+    services:
+    - kind: Service
+      name: minio-service
+      port: 9000

+ 53 - 0
backup/traefik-helmchartconfig.yaml

@@ -0,0 +1,53 @@
+apiVersion: helm.cattle.io/v1
+kind: HelmChartConfig
+metadata:
+  name: traefik
+  namespace: kube-system
+spec:
+  valuesContent: |-
+    ports:
+      web:
+        exposedPort: 9001
+      websecure:
+        exposedPort: 9000
+
+    additionalArguments:
+      # Auto cert renewal via cloudflare
+      - "--certificatesresolvers.letsencrypt.acme.email=joshbicking@comcast.net"
+      - "--certificatesresolvers.letsencrypt.acme.storage=/data/acme.json"
+      - "--certificatesresolvers.letsencrypt.acme.dnschallenge.provider=cloudflare"
+      - "--certificatesresolvers.letsencrypt.acme.dnschallenge.resolvers=1.1.1.1:53,8.8.8.8:53"
+      - "--entrypoints.websecure.http.tls.certResolver=letsencrypt"
+      - "--entrypoints.websecure.http.tls.domains[0].main=s3.bnuuy.org"
+
+      # debug, uncomment for testing
+      # - "--log.level=DEBUG"
+      # - "--certificatesresolvers.letsencrypt.acme.caServer=https://acme-staging-v02.api.letsencrypt.org/directory"
+
+    env:
+      - name: CLOUDFLARE_EMAIL
+        valueFrom:
+          secretKeyRef:
+            name: cloudflare-secrets
+            key: email
+            optional: false
+      - name: CLOUDFLARE_API_KEY
+        valueFrom:
+          secretKeyRef:
+            name: cloudflare-secrets
+            key: api-key
+            optional: false
+
+    persistence:
+      enabled: true
+      storageClass: local-path
+
+    # Fix for acme.json file being changed to 660 from 600
+    podSecurityContext:
+      fsGroup: null
+
+    service:
+      spec:
+        externalTrafficPolicy: Local
+    hostNetwork: true
+

+ 76 - 0
backup/velero_restore_new.py

@@ -0,0 +1,76 @@
+import datetime
+import os
+import json
+import subprocess
+import sys
+
+namespaces = ["vaultwarden", "postgres"]
+k3s_env = {"KUBECONFIG": "/etc/rancher/k3s/k3s.yaml"}
+ntfy_topic = "https://ntfy.jibby.org/velero-restore"
+ntfy_auth = os.environ["NTFY_AUTH"]
+restart_deployments_in = ["vaultwarden"]
+
+
+def main():
+    if sys.version_info.major < 3 or sys.version_info.minor < 11:
+        raise RuntimeError("Python 3.11 or greater required")
+
+    velero_str = subprocess.run(
+        ["/usr/local/bin/velero", "backup", "get", "-o", "json"],
+        env=k3s_env,
+        check=True,
+        capture_output=True,
+    ).stdout
+
+    velero = json.loads(velero_str)
+
+    backups_by_timestamp = {
+        backup['metadata']['creationTimestamp']: backup
+        for backup in velero['items']
+    }
+    if not backups_by_timestamp:
+        raise ValueError("no backups?")
+
+    newest_backup_timestamp = max(backups_by_timestamp.keys())
+    one_week_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)
+    if datetime.datetime.fromisoformat(newest_backup_timestamp) < one_week_ago:
+        raise ValueError(f"no backups < 1 week old? {newest_backup_timestamp=}")
+
+    newest_backup = backups_by_timestamp[newest_backup_timestamp]
+    print(f"Using newest backup {newest_backup['metadata']['name']}, taken at {newest_backup['metadata']['creationTimestamp']}")
+
+    # delete namespaces
+    for namespace in namespaces:
+        subprocess.run(
+            ["/usr/local/bin/kubectl", "delete", "namespace", namespace],
+            env=k3s_env,
+            check=True,
+        )
+
+    subprocess.run(
+        ["/usr/local/bin/velero", "restore", "create", "--from-backup", newest_backup['metadata']['name'], "--include-namespaces", ",".join(namespaces), "--wait"],
+        env=k3s_env,
+        check=True,
+    )
+
+    for namespace in restart_deployments_in:
+        subprocess.run(
+            ["/usr/local/bin/kubectl", "-n", namespace, "rollout", "restart", "deployment"],
+            env=k3s_env,
+            check=True,
+        )
+    ntfy_send(
+        f"Successfully ran velero restore for backup {newest_backup['metadata']['name']}, "
+        f"{newest_backup['metadata']['creationTimestamp']}"
+    )
+
+def ntfy_send(data):
+    # auth & payload formatting is awful in urllib. just use curl
+    subprocess.run(["curl", "-u", ntfy_auth, "-d", data, ntfy_topic], check=True)
+
+if __name__ == '__main__':
+    try:
+        main()
+    except Exception as e:
+        ntfy_send(f"Error running velero restore: {str(e)}")
+        raise

+ 2 - 0
cloudflared.yaml

@@ -87,6 +87,8 @@ data:
       service: http://vaultwarden-service.vaultwarden.svc.cluster.local:3012
     - hostname: homeassistant.jibby.org
       service: http://homeassistant-service.homeassistant.svc.cluster.local:8123
+    - hostname: ntfy.jibby.org
+      service: http://ntfy-service.ntfy.svc.cluster.local:80
     # - hostname: mastodon.jibby.org
     #   service: http://mastodon-service.mastodon.svc.cluster.local:3000
     # - hostname: streaming-mastodon.jibby.org

+ 15 - 0
data-pvc.yaml

@@ -0,0 +1,15 @@
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: data-pvc
+  namespace: plex
+spec:
+  storageClassName: data-sc
+  volumeName: data-static-pv
+  volumeMode: Filesystem
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 20Ti

+ 14 - 0
duplicati-pvc.yaml

@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: duplicati-pvc
+  namespace: plex
+  labels:
+    app: duplicati
+spec:
+  storageClassName: ceph-block
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 10Gi

+ 104 - 0
duplicati.yaml

@@ -0,0 +1,104 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: duplicati
+  namespace: plex
+spec:
+  selector:
+    matchLabels:
+      app: duplicati
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: duplicati
+      annotations:
+        backup.velero.io/backup-volumes-excludes: plex,media2,data
+    spec:
+      containers:
+      - name: duplicati
+        image: lscr.io/linuxserver/duplicati:version-v2.0.7.1-2.0.7.1_beta_2023-05-25
+        ports:
+        - containerPort: 8200
+          name: http-web-svc
+        env:
+        - name: TZ
+          value: Etc/UTC
+        - name: PUID
+          value: "1000"
+        - name: PGID
+          value: "1000"
+        volumeMounts:
+        - mountPath: "/plex"
+          name: plex
+        - mountPath: "/media2"
+          name: media2
+        - mountPath: "/data"
+          name: data
+        - mountPath: "/config"
+          name: config
+      volumes:
+      - name: plex
+        persistentVolumeClaim:
+          claimName: plex-pvc
+      - name: media2
+        persistentVolumeClaim:
+          claimName: media2-pvc
+      - name: data
+        persistentVolumeClaim:
+          claimName: data-pvc
+      - name: config
+        persistentVolumeClaim:
+          claimName: duplicati-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: duplicati-service
+  namespace: plex
+spec:
+  selector:
+    app: duplicati
+  type: ClusterIP
+  ports:
+  - name: duplicati-web-port
+    protocol: TCP
+    port: 8200
+    targetPort: http-web-svc
+---
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: duplicati-run-tree
+  namespace: plex
+spec:
+  schedule: "* * * * *"
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          containers:
+          - name: tree
+            image: alpine:3.18.4
+            imagePullPolicy: IfNotPresent
+            envFrom:
+            - secretRef:
+                name: ntfy-auth-secret
+            command:
+            - /bin/sh
+            - -c
+            - (tree /plex > /plex/tree.txt && tree /media2 > /media2/tree.txt) || curl -u $NTFY_AUTH -d 'error generating media tree' https://ntfy.jibby.org/media-tree
+            volumeMounts:
+            - mountPath: "/plex"
+              name: plex
+            - mountPath: "/media2"
+              name: media2
+          volumes:
+          - name: plex
+            persistentVolumeClaim:
+              claimName: plex-pvc
+          - name: media2
+            persistentVolumeClaim:
+              claimName: media2-pvc
+          restartPolicy: OnFailure

+ 1 - 1
gogs.yaml

@@ -36,7 +36,7 @@ spec:
     spec:
       containers:
       - name: gogs
-        image: gogs/gogs:0.12.9
+        image: gogs/gogs:0.13.0
         env:
         - name: SOCAT_LINK
           value: "false"

+ 2 - 0
jellyfin.yaml

@@ -18,6 +18,8 @@ spec:
     metadata:
       labels:
         app: jellyfin
+      annotations:
+        backup.velero.io/backup-volumes-excludes: media,tmpfs
     spec:
       containers:
       - name: jellyfin

+ 2 - 0
lidarr.yaml

@@ -13,6 +13,8 @@ spec:
     metadata:
       labels:
         app: lidarr
+      annotations:
+        backup.velero.io/backup-volumes-excludes: plex
     spec:
       containers:
       - name: lidarr

+ 1 - 1
nextcloud/values.yaml

@@ -22,7 +22,7 @@
 ##
 image:
   repository: nextcloud
-  tag: 26.0.4-apache
+  tag: 26.0.6-apache
   pullPolicy: IfNotPresent
   # pullSecrets:
   #   - myRegistrKeySecretName

+ 20 - 0
ntfy-pvc.yaml

@@ -0,0 +1,20 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: ntfy
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ntfy-pvc
+  namespace: ntfy
+  labels:
+    app: ntfy
+spec:
+  storageClassName: ceph-block
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 2Gi

+ 74 - 0
ntfy.yaml

@@ -0,0 +1,74 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: ntfy
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: ntfy
+  namespace: ntfy
+data:
+  server.yml: |
+    # Template: https://github.com/binwiederhier/ntfy/blob/main/server/server.yml
+    base-url: https://ntfy.jibby.org
+    auth-file: "/ntfy/user.db"
+    auth-default-access: "deny-all"
+    behind-proxy: true
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ntfy
+  namespace: ntfy
+spec:
+  selector:
+    matchLabels:
+      app: ntfy
+  template:
+    metadata:
+      labels:
+        app: ntfy
+    spec:
+      containers:
+      - name: ntfy
+        image: binwiederhier/ntfy:v2.7.0
+        args: ["serve"]
+        ports:
+        - containerPort: 80
+          name: http
+        volumeMounts:
+        - name: config
+          mountPath: "/etc/ntfy"
+          readOnly: true
+        - name: data
+          mountPath: "/ntfy"
+        livenessProbe:
+          httpGet:
+            path: /v1/health
+            port: 80
+          failureThreshold: 10
+          initialDelaySeconds: 30
+          periodSeconds: 30
+          timeoutSeconds: 10
+      volumes:
+        - name: config
+          configMap:
+            name: ntfy
+        - name: data
+          persistentVolumeClaim:
+            claimName: ntfy-pvc
+---
+# Basic service for port 80
+apiVersion: v1
+kind: Service
+metadata:
+  name: ntfy-service
+  namespace: ntfy
+spec:
+  selector:
+    app: ntfy
+  ports:
+  - port: 80
+    targetPort: 80

+ 2 - 2
plex-pvc.yaml

@@ -27,7 +27,7 @@ spec:
     - ReadWriteMany
   resources:
     requests:
-      storage: 20Ti
+      storage: 21Ti
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
@@ -42,4 +42,4 @@ spec:
     - ReadWriteMany
   resources:
     requests:
-      storage: 20Ti
+      storage: 40Ti

+ 3 - 1
plex.yaml

@@ -18,10 +18,12 @@ spec:
     metadata:
       labels:
         app: plex
+      annotations:
+        backup.velero.io/backup-volumes-excludes: media,media-metadata,tmpfs
     spec:
       containers:
       - name: plex
-        image: linuxserver/plex:amd64-version-1.32.5.7349-8f4248874
+        image: linuxserver/plex:amd64-version-1.32.5.7516-8f4248874
         # for debugging
         # command: ["/bin/sh"]
         # args: ["-c", "sleep 3600"]

+ 3 - 1
radarr.yaml

@@ -13,6 +13,8 @@ spec:
     metadata:
       labels:
         app: radarr
+      annotations:
+        backup.velero.io/backup-volumes-excludes: plex
     spec:
       containers:
       - name: radarr
@@ -53,4 +55,4 @@ spec:
   - name: radarr-web-port
     protocol: TCP
     port: 7878
-    targetPort: http-web-svc
+    targetPort: http-web-svc

+ 7 - 0
secret-example.yaml

@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: example-secret
+  namespace: example
+stringData:
+  ENVVARNAME: "envvarvalue"

+ 3 - 1
sonarr.yaml

@@ -13,6 +13,8 @@ spec:
     metadata:
       labels:
         app: sonarr
+      annotations:
+        backup.velero.io/backup-volumes-excludes: plex
     spec:
       containers:
       - name: sonarr
@@ -53,4 +55,4 @@ spec:
   - name: sonarr-web-port
     protocol: TCP
     port: 8989
-    targetPort: http-web-svc
+    targetPort: http-web-svc

+ 1 - 23
vaultwarden.yaml

@@ -21,7 +21,7 @@ spec:
     spec:
       containers:
       - name: vaultwarden
-        image: vaultwarden/server:1.28.1
+        image: vaultwarden/server:1.29.2
         ports:
         - containerPort: 80
           name: http-web-svc
@@ -68,25 +68,3 @@ spec:
     protocol: TCP
     port: 3012
     targetPort: http-sock-svc
----
-apiVersion: traefik.containo.us/v1alpha1
-kind: IngressRoute
-metadata:
-  name: vaultwarden
-  namespace: vaultwarden
-spec:
-  entryPoints:
-  - websecure
-  routes:
-  - kind: Rule
-    match: Host(`vaultwarden.jibby.org`)
-    services:
-    - kind: Service
-      name: vaultwarden-service
-      port: 80
-  - kind: Rule
-    match: Host(`vaultwarden.jibby.org`) && Path(`/notifications/hub`)
-    services:
-    - kind: Service
-      name: vaultwarden-service
-      port: 3012