Преглед на файлове

ntfy-alertmanager, ooniprobe, EC ceph objectstore

Josh Bicking преди 1 месец
родител
ревизия
50ec17c442

+ 25 - 2
README.md

@@ -249,7 +249,9 @@ $ helm upgrade -i nvdp nvdp/nvidia-device-plugin ... --set-file config.map.confi
 
 # ceph client for cephfs volumes
 
-## New method
+## Kernel driver
+
+### New method
 
 https://docs.ceph.com/en/latest/man/8/mount.ceph/
 
@@ -257,7 +259,7 @@ https://docs.ceph.com/en/latest/man/8/mount.ceph/
 sudo mount -t ceph user@<cluster FSID>.<filesystem name>=/ /mnt/ceph -o secret=<secret key>,x-systemd.requires=ceph.target,x-systemd.mount-timeout=5min,_netdev,mon_addr=192.168.1.1
 ```
 
-## Older method (stopped working for me around Pacific)
+### Older method (stopped working for me around Pacific)
 
 ```
 sudo vi /etc/fstab
@@ -265,6 +267,27 @@ sudo vi /etc/fstab
 192.168.1.1,192.168.1.2:/    /ceph   ceph    name=admin,secret=<secret key>,x-systemd.mount-timeout=5min,_netdev,mds_namespace=data
 ```
 
+## FUSE
+
+```
+$ cat /etc/ceph/ceph.conf                                                                                                                                
+[global]
+        fsid = <my cluster uuid>
+        mon_host = [v2:192.168.1.1:3300/0,v1:192.168.1.1:6789/0] [v2:192.168.1.2:3300/0,v1:192.168.1.2:6789/0]
+$ cat /etc/ceph/ceph.client.admin.keyring                                                                                                                
+[client.admin]                                                                
+        key = <my key>
+        caps mds = "allow *"          
+        caps mgr = "allow *"          
+        caps mon = "allow *"                                                  
+        caps osd = "allow *"
+
+sudo vi /etc/fstab
+
+none /ceph fuse.ceph ceph.id=admin,ceph.client_fs=data,x-systemd.requires=ceph.target,x-systemd.mount-timeout=5min,_netdev 0 0
+```
+
+
 # disable mitigations
 https://unix.stackexchange.com/questions/554908/disable-spectre-and-meltdown-mitigations
 

+ 9 - 7
cloudflared.yaml

@@ -64,15 +64,10 @@ data:
     ingress:
     - hostname: gogs.jibby.org
       service: http://gogs-service.gogs.svc.cluster.local:3000
-    #- hostname: matrix.jibby.org
-    #  service: http://matrix-service.matrix.svc.cluster.local:8008
     - hostname: miniflux.jibby.org
       service: http://miniflux-service.miniflux.svc.cluster.local:8080
-    - hostname: vaultwarden.jibby.org
-      service: http://vaultwarden-service.vaultwarden.svc.cluster.local:80
-    - hostname: vaultwarden.jibby.org
-      path: /notifications/hub.*
-      service: http://vaultwarden-service.vaultwarden.svc.cluster.local:3012
+    - hostname: ntfy-alertmanager.jibby.org
+      service: http://ntfy-alertmanager.monitoring.svc.cluster.local:80
     - hostname: homeassistant.jibby.org
       service: http://homeassistant-service.homeassistant.svc.cluster.local:8123
     - hostname: ntfy.jibby.org
@@ -81,4 +76,11 @@ data:
     #   service: http://mastodon-service.mastodon.svc.cluster.local:3000
     # - hostname: streaming-mastodon.jibby.org
     #   service: http://mastodon-service.mastodon.svc.cluster.local:4000
+    #- hostname: vaultwarden.jibby.org
+    #  service: http://vaultwarden-service.vaultwarden.svc.cluster.local:80
+    #- hostname: vaultwarden.jibby.org
+    #  path: /notifications/hub.*
+    #  service: http://vaultwarden-service.vaultwarden.svc.cluster.local:3012
+    #- hostname: matrix.jibby.org
+    #  service: http://matrix-service.matrix.svc.cluster.local:8008
     - service: http_status:404

+ 15 - 0
examples/prometheus-rule.yaml

@@ -0,0 +1,15 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  creationTimestamp: null
+  labels:
+    prometheus: example
+    role: alert-rules
+  name: prometheus-example-rules
+  namespace: monitoring
+spec:
+  groups:
+  - name: ./example.rules
+    rules:
+    - alert: ExampleAlert4
+      expr: vector(1)

+ 20 - 0
monitoring/alertmanager-ingressroute.yaml

@@ -0,0 +1,20 @@
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: alertmanager-operated
+  namespace: monitoring
+spec:
+  entryPoints:
+  - websecure
+  routes:
+  - kind: Rule
+    match: Host(`alertmanager.lan.jibby.org`)
+    services:
+    - kind: Service
+      name: alertmanager-operated
+      port: 9093
+    middlewares:
+    - name: lanonly
+      namespace: kube-system
+

+ 9 - 0
monitoring/alertmanager.yaml

@@ -0,0 +1,9 @@
+apiVersion: monitoring.coreos.com/v1
+kind: Alertmanager
+metadata:
+  name: alertmanager
+  namespace: monitoring
+spec:
+  replicas: 1
+  alertmanagerConfiguration:
+    name: alertmanagerconfig-ntfy-webhook

+ 26 - 0
monitoring/alertmanagerconfig-ntfy-webhook.yaml

@@ -0,0 +1,26 @@
+apiVersion: monitoring.coreos.com/v1alpha1
+kind: AlertmanagerConfig
+metadata:
+  name: alertmanagerconfig-ntfy-webhook
+  namespace: monitoring
+  labels:
+    alertmanagerConfig: ntfy-webhook
+spec:
+  route:
+    groupBy: ['job']
+    groupWait: 30s
+    groupInterval: 5m
+    repeatInterval: 12h
+    receiver: 'ntfy'
+  receivers:
+  - name: 'ntfy'
+    webhookConfigs:
+    - url: 'http://ntfy-alertmanager.monitoring.svc.cluster.local/'
+      httpConfig:
+        basicAuth:
+          username:
+            name: ntfy-alertmanager-user
+            key: name
+          password:
+            name: ntfy-alertmanager-user
+            key: password

+ 62 - 0
monitoring/ntfy-alertmanager.yaml

@@ -0,0 +1,62 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ntfy-alertmanager
+  namespace: monitoring
+spec:
+  selector:
+    matchLabels:
+      app: ntfy-alertmanager
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: ntfy-alertmanager
+    spec:
+      containers:
+      - name: ntfy-alertmanager
+        image: xenrox/ntfy-alertmanager:latest
+        args: ["--config", "/config/config.yaml"]
+        ports:
+        - containerPort: 8080
+          name: http-web-svc
+        volumeMounts:
+          - mountPath: "/config"
+            name: "config"
+      volumes:
+        - name: config
+          secret:
+            secretName: ntfy-alertmanager-config-secret
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ntfy-alertmanager
+  namespace: monitoring
+spec:
+  selector:
+    app: ntfy-alertmanager
+  type: ClusterIP
+  ports:
+  - name: ntfy-alertmanager-port
+    protocol: TCP
+    port: 80
+    targetPort: http-web-svc
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: ntfy-alertmanager
+  namespace: monitoring
+spec:
+  entryPoints:
+  - websecure
+  routes:
+  - kind: Rule
+    match: Host(`ntfy-alertmanager.jibby.org`)
+    services:
+    - kind: Service
+      name: ntfy-alertmanager
+      port: 80
+

+ 10 - 1
monitoring/prometheus/prometheus.yaml

@@ -15,8 +15,18 @@ spec:
     fsGroup: 2000
     runAsNonRoot: true
     runAsUser: 1000
+  alerting:
+    alertmanagers:
+      - namespace: monitoring
+        name: alertmanager-operated
+        port: web
   serviceAccountName: prometheus
   serviceMonitorSelector: {}
+  serviceMonitorNamespaceSelector: {}
+  ruleSelector: # {} # does this pick up nothing if nil?
+    matchLabels:
+      role: alert-rules
+  ruleNamespaceSelector: {}
   #  matchExpressions:
   #  - key: name
   #    operator: In
@@ -29,7 +39,6 @@ spec:
   #    - rook-ceph-exporter
   #    - cloudflared
   #    - shelly-plug-exporter
-  serviceMonitorNamespaceSelector: {}
   #  matchExpressions:
   #  - key: kubernetes.io/metadata.name
   #    operator: In

+ 20 - 0
ooniprobe-pvc.yaml

@@ -0,0 +1,20 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: ooniprobe
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ooniprobe-pvc
+  namespace: ooniprobe
+  labels:
+    app: ooniprobe
+spec:
+  storageClassName: ceph-block
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi

+ 47 - 0
ooniprobe.yaml

@@ -0,0 +1,47 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+    name: ooniprobe
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ooniprobe
+  namespace: ooniprobe
+spec:
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: ooniprobe
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: ooniprobe
+      annotations:
+        backup.velero.io/backup-volumes-excludes: ooniprobe
+    spec:
+      containers:
+      - name: ooniprobe
+        image: aaimio/ooniprobe:latest
+        env:
+        - name: informed_consent
+          value: "true"
+        - name: upload_results
+          value: "true"
+        - name: sleep
+          value: "true"
+        volumeMounts:
+        - mountPath: "/config"
+          name: ooniprobe
+        resources:
+          requests:
+            memory: "0"
+          limits:
+            memory: "300Mi"
+      volumes:
+      - name: ooniprobe
+        persistentVolumeClaim:
+          claimName: ooniprobe-pvc

+ 3 - 3
rook/buckets/user-josh.yaml → rook/buckets/ceph-objectstore-ec-user-jibby.yaml

@@ -1,11 +1,11 @@
 apiVersion: ceph.rook.io/v1
 kind: CephObjectStoreUser
 metadata:
-  name: josh
+  name: jibby
   namespace: rook-ceph
 spec:
-  store: ceph-objectstore
-  displayName: Josh
+  store: ceph-objectstore-ec
+  displayName: jibby
   quotas:
     maxBuckets: 1
     maxSize: 10G

+ 2 - 2
rook/buckets/ingress.yaml

@@ -11,5 +11,5 @@ spec:
     match: Host(`s3.jibby.org`)
     services:
     - kind: Service
-      name: rook-ceph-rgw-ceph-objectstore
-      port: 6980
+      name: rook-ceph-rgw-ceph-objectstore-ec
+      port: 6981

+ 23 - 0
rook/builtin-mgr.yaml

@@ -0,0 +1,23 @@
+# https://github.com/rook/rook/issues/11368
+apiVersion: ceph.rook.io/v1
+kind: CephBlockPool
+metadata:
+  # If the built-in Ceph pool used by the Ceph mgr needs to be configured with alternate
+  # settings, create this pool with any of the pool properties. Create this pool immediately
+  # with the cluster CR, or else some properties may not be applied when Ceph creates the
+  # pool by default.
+  name: builtin-mgr
+  namespace: rook-ceph
+spec:
+  # The required pool name with underscores cannot be specified as a K8s resource name, thus we override
+  # the pool name created in Ceph with this name property.
+  name: .mgr
+  failureDomain: host
+  replicated:
+    size: 3
+    requireSafeReplicaSize: true
+  deviceClass: ssd
+  parameters:
+    compression_mode: none
+  mirroring:
+    enabled: false

+ 2 - 2
rook/ceph-object-store.yaml → rook/ceph-object-store-ec.yaml

@@ -1,7 +1,7 @@
 apiVersion: ceph.rook.io/v1
 kind: CephObjectStore
 metadata:
-  name: ceph-objectstore
+  name: ceph-objectstore-ec
   namespace: rook-ceph
 spec:
   metadataPool:
@@ -18,7 +18,7 @@ spec:
   preservePoolsOnDelete: true
   gateway:
     sslCertificateRef:
-    port: 6980
+    port: 6981
     # securePort: 443
     instances: 1
   #healthCheck:

+ 1 - 0
rook/data/data-filesystem.yaml

@@ -1,4 +1,5 @@
 # TODO move to the main helm values
+# TODO isn't written much, could probably be EC
 apiVersion: ceph.rook.io/v1
 kind: CephFilesystem
 metadata:

+ 4 - 1
rook/media2/media2-filesystem.yaml

@@ -8,17 +8,20 @@ spec:
   metadataPool:
     replicated:
       size: 3
-      deviceClass: ssd
+    deviceClass: ssd
+    failureDomain: host
   dataPools:
     - name: default
       replicated:
         size: 3
       deviceClass: hdd
+      failureDomain: host
     - name: erasurecoded
       erasureCoded:
         dataChunks: 2
         codingChunks: 1
       deviceClass: hdd
+      failureDomain: host
   preserveFilesystemOnDelete: true
   metadataServer:
     activeCount: 1

+ 1 - 1
rook/plex/plex-filesystem.yaml

@@ -8,7 +8,7 @@ spec:
   metadataPool:
     replicated:
       size: 3
-      deviceClass: ssd
+    deviceClass: ssd
   dataPools:
     - name: default
       replicated:

+ 1 - 1
shelly-plug-exporter.yaml

@@ -22,7 +22,7 @@ spec:
     spec:
       containers:
       - name: shelly-plug-exporter
-        image: jibby0/shelly-plug-exporter:24.2.0-fork2
+        image: webdevops/shelly-plug-exporter:24.9.0
         ports:
         - containerPort: 8080
           name: metrics