Sfoglia il codice sorgente

fix & document traefik & nextcloud

Josh Bicking 5 mesi fa
parent
commit
bf82e30c70

+ 17 - 1
README.md

@@ -18,6 +18,22 @@ curl -sfL https://get.k3s.io | K3S_TOKEN=$NODE_TOKEN INSTALL_K3S_EXEC="server --
 
 TODO
 
+## purging k3s image cache
+
+```
+$ sudo crictl rmi --prune
+```
+
+## ingress
+
+Uses traefik, the k3s default.
+
+externalTrafficPolicy: Local is used to preserve forwarded IPs.
+
+A `cluster-ingress=true` label is given to the node my router is pointing to. Some services use a nodeAffinity to request it.
+
+For traefik, this is a harmless optimization to reduce traffic hairpinning. For pods with `hostNetwork: true`, this ensures they run on the node with the right IP.
+
 # rook
 
 ## installing rook
@@ -293,4 +309,4 @@ TODO. This would be nice for one-off Windows game servers.
   - cloudflared running in the gogs pod?
   - do gitea or gitlab have better options?
 - Something better than `expose` for accessing internal services
-  - short term, capture the resource definition YAML & save it alongside the service
+  - short term, capture the resource definition YAML & save it alongside the service

+ 1 - 1
bazarr.yaml

@@ -18,7 +18,7 @@ spec:
     spec:
       containers:
       - name: bazarr
-        image: lscr.io/linuxserver/bazarr:development-v1.3.2-beta.10-ls550
+        image: lscr.io/linuxserver/bazarr:development-version-v1.4.3-beta.31
         ports:
         - containerPort: 6767
           name: http-web-svc

+ 1 - 1
cloudflared.yaml

@@ -17,7 +17,7 @@ spec:
     spec:
       containers:
       - name: cloudflared
-        image: cloudflare/cloudflared:2023.8.2
+        image: cloudflare/cloudflared:2024.4.1
         args:
         - tunnel
         - --config

+ 45 - 0
homeassistant.yaml

@@ -14,6 +14,15 @@ spec:
       labels:
         app: homeassistant
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
       hostNetwork: true
       containers:
       - name: homeassistant
@@ -75,6 +84,15 @@ spec:
       labels:
         app: whisper
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
       hostNetwork: true
       containers:
       - name: homeassistant
@@ -111,6 +129,15 @@ spec:
       labels:
         app: piper
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
       hostNetwork: true
       containers:
       - name: homeassistant
@@ -145,6 +172,15 @@ spec:
       labels:
         app: openwakeword
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
       hostNetwork: true
       containers:
       - name: homeassistant
@@ -181,6 +217,15 @@ spec:
       labels:
         app: mosquitto
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
       hostNetwork: true
       containers:
       - name: homeassistant

+ 19 - 7
jellyfin.yaml

@@ -32,15 +32,22 @@ spec:
           name: config
         - mountPath: "/media"
           name: media
-        - mountPath: "/transcodes"
-          name: tmpfs
+        #- mountPath: "/transcodes"
+        #  name: tmpfs
+        # Quicksync
+        - name: "render-device"
+          mountPath: "/dev/dri/renderD128"
         env:
         - name: PUID
           value: "1000"
         - name: PGID
           value: "1000"
-        - name: NVIDIA_DRIVER_CAPABILITIES
-          value: "all"
+        ## NVIDIA
+        #- name: NVIDIA_DRIVER_CAPABILITIES
+        #  value: "all"
+        # Quicksync
+        securityContext:
+            privileged: true
         livenessProbe:
           httpGet:
             path: /web/index.html
@@ -49,9 +56,10 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 30
           timeoutSeconds: 10
-        resources:
-          limits:
-            nvidia.com/gpu: 1
+        ## NVIDIA
+        #resources:
+        #  limits:
+        #    nvidia.com/gpu: 1
       affinity:
         nodeAffinity:
           requiredDuringSchedulingIgnoredDuringExecution:
@@ -72,6 +80,10 @@ spec:
         emptyDir:
           medium: Memory
           sizeLimit: 12Gi
+      # Quicksync
+      - name: "render-device"
+        hostPath:
+          path: "/dev/dri/renderD128"
 ---
 apiVersion: v1
 kind: Service

+ 8 - 0
makemkv.sh

@@ -0,0 +1,8 @@
+# one-off for making mkvs out of blu-ray dumps
+docker run -d \
+    --name=makemkv \
+    -p 5800:5800 \
+    -v /my-makemkv-config:/config:rw \
+    -v /my-video/storage:/storage:ro \
+    -v /my-video/output:/output:rw \
+    jlesage/makemkv

+ 1 - 1
monitoring/kube-state-metrics/kube-state-metrics-clusterrole.yaml

@@ -5,7 +5,7 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 2.4.2
+    app.kubernetes.io/version: 2.10.1
   name: kube-state-metrics
 rules:
 - apiGroups:

+ 1 - 1
monitoring/kube-state-metrics/kube-state-metrics-clusterrolebinding.yaml

@@ -5,7 +5,7 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 2.4.2
+    app.kubernetes.io/version: 2.10.1
   name: kube-state-metrics
 roleRef:
   apiGroup: rbac.authorization.k8s.io

+ 4 - 4
monitoring/kube-state-metrics/kube-state-metrics-deployment.yaml

@@ -5,7 +5,7 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 2.4.2
+    app.kubernetes.io/version: 2.10.1
   name: kube-state-metrics
   namespace: monitoring
 spec:
@@ -21,11 +21,11 @@ spec:
         app.kubernetes.io/component: exporter
         app.kubernetes.io/name: kube-state-metrics
         app.kubernetes.io/part-of: kube-prometheus
-        app.kubernetes.io/version: 2.4.2
+        app.kubernetes.io/version: 2.10.1
     spec:
       automountServiceAccountToken: true
       containers:
-      - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.4.2
+      - image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1
         livenessProbe:
           httpGet:
             path: /healthz
@@ -41,7 +41,7 @@ spec:
         resources:
           limits:
             cpu: 20m
-            memory: 40Mi
+            memory: 100Mi
           requests:
             cpu: 10m
             memory: 20Mi

+ 1 - 1
monitoring/kube-state-metrics/kube-state-metrics-service.yaml

@@ -5,7 +5,7 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 2.4.2
+    app.kubernetes.io/version: 2.10.1
   name: kube-state-metrics
   namespace: monitoring
 spec:

+ 1 - 1
monitoring/kube-state-metrics/kube-state-metrics-serviceaccount.yaml

@@ -6,6 +6,6 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 2.4.2
+    app.kubernetes.io/version: 2.10.1
   name: kube-state-metrics
   namespace: monitoring

+ 1 - 1
monitoring/kube-state-metrics/kube-state-metrics-servicemonitor.yaml

@@ -5,7 +5,7 @@ metadata:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/part-of: kube-prometheus
-    app.kubernetes.io/version: 1.9.7
+    app.kubernetes.io/version: 2.10.1
     name: kube-state-metrics
     prometheus-enabled: "true"
   name: kube-state-metrics

+ 1 - 1
nextcloud/ingress.yaml

@@ -12,4 +12,4 @@ spec:
     services:
     - kind: Service
       name: nextcloud
-      port: 8080
+      port: 8080

+ 193 - 13
nextcloud/values.yaml

@@ -22,7 +22,7 @@
 ##
 image:
   repository: nextcloud
-  tag: 28.0.2-apache
+  tag: 29.0.0-fpm
   pullPolicy: IfNotPresent
   # pullSecrets:
   #   - myRegistrKeySecretName
@@ -121,7 +121,19 @@ nextcloud:
       password: pass
   # PHP Configuration files
   # Will be injected in /usr/local/etc/php/conf.d for apache image and in /usr/local/etc/php-fpm.d when nginx.enabled: true
-  phpConfigs: {}
+  phpConfigs:
+    www.conf: |
+      [www]
+      user = www-data
+      group = www-data
+      listen = 127.0.0.1:9000
+      pm = dynamic
+      pm.max_children = 86
+      pm.start_servers = 21
+      pm.min_spare_servers = 21
+      pm.max_spare_servers = 64
+      ; for large file uploads
+      request_terminate_timeout = 3600
   # Default config files
   # IMPORTANT: Will be used only if you put extra configs, otherwise default will come from nextcloud itself
   # Default confgurations can be found here: https://github.com/nextcloud/docker/tree/master/16.0/apache/config
@@ -232,7 +244,7 @@ nextcloud:
 
 nginx:
   ## You need to set an fpm version of the image for nextcloud if you want to use nginx!
-  enabled: false
+  enabled: true
   image:
     repository: nginx
     tag: alpine
@@ -240,9 +252,169 @@ nginx:
 
   config:
     # This generates the default nginx config as per the nextcloud documentation
-    default: true
-    # custom: |-
-    #     worker_processes  1;..
+    default: false
+    # Default is below, changes marked with CHANGE
+    custom: |-
+      error_log  /var/log/nginx/error.log warn;
+      pid        /var/run/nginx.pid;
+
+
+      events {
+          worker_connections  1024;
+      }
+
+
+      http {
+          include       /etc/nginx/mime.types;
+          default_type  application/octet-stream;
+
+          log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                            '$status $body_bytes_sent "$http_referer" '
+                            '"$http_user_agent" "$http_x_forwarded_for"';
+
+          access_log  /var/log/nginx/access.log  main;
+          # CHANGE for large file uploads
+          proxy_read_timeout 3600;
+
+          sendfile        on;
+          #tcp_nopush     on;
+
+          keepalive_timeout  65;
+
+          #gzip  on;
+
+          upstream php-handler {
+              server 127.0.0.1:9000;
+          }
+
+          server {
+              listen 80;
+
+              # HSTS settings
+              # WARNING: Only add the preload option once you read about
+              # the consequences in https://hstspreload.org/. This option
+              # will add the domain to a hardcoded list that is shipped
+              # in all major browsers and getting removed from this list
+              # could take several months.
+              #add_header Strict-Transport-Security "max-age=15768000; includeSubDomains; preload;" always;
+
+              # set max upload size
+              client_max_body_size 10G;
+              fastcgi_buffers 64 4K;
+
+              # Enable gzip but do not remove ETag headers
+              gzip on;
+              gzip_vary on;
+              gzip_comp_level 4;
+              gzip_min_length 256;
+              gzip_proxied expired no-cache no-store private no_last_modified no_etag auth;
+              gzip_types application/atom+xml application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy;
+
+              # Pagespeed is not supported by Nextcloud, so if your server is built
+              # with the `ngx_pagespeed` module, uncomment this line to disable it.
+              #pagespeed off;
+
+              # HTTP response headers borrowed from Nextcloud `.htaccess`
+              add_header Referrer-Policy                      "no-referrer"       always;
+              add_header X-Content-Type-Options               "nosniff"           always;
+              add_header X-Download-Options                   "noopen"            always;
+              add_header X-Frame-Options                      "SAMEORIGIN"        always;
+              add_header X-Permitted-Cross-Domain-Policies    "none"              always;
+              add_header X-Robots-Tag                         "noindex, nofollow" always;
+              add_header X-XSS-Protection                     "1; mode=block"     always;
+              add_header X-Forwarded-For $proxy_add_x_forwarded_for;
+
+              # Remove X-Powered-By, which is an information leak
+              fastcgi_hide_header X-Powered-By;
+
+              # Path to the root of your installation
+              root /var/www/html;
+
+              # Specify how to handle directories -- specifying `/index.php$request_uri`
+              # here as the fallback means that Nginx always exhibits the desired behaviour
+              # when a client requests a path that corresponds to a directory that exists
+              # on the server. In particular, if that directory contains an index.php file,
+              # that file is correctly served; if it doesn't, then the request is passed to
+              # the front-end controller. This consistent behaviour means that we don't need
+              # to specify custom rules for certain paths (e.g. images and other assets,
+              # `/updater`, `/ocm-provider`, `/ocs-provider`), and thus
+              # `try_files $uri $uri/ /index.php$request_uri`
+              # always provides the desired behaviour.
+              index index.php index.html /index.php$request_uri;
+
+              # Rule borrowed from `.htaccess` to handle Microsoft DAV clients
+              location = / {
+                  if ( $http_user_agent ~ ^DavClnt ) {
+                      return 302 /remote.php/webdav/$is_args$args;
+                  }
+              }
+
+              location = /robots.txt {
+                  allow all;
+                  log_not_found off;
+                  access_log off;
+              }
+
+              # Make a regex exception for `/.well-known` so that clients can still
+              # access it despite the existence of the regex rule
+              # `location ~ /(\.|autotest|...)` which would otherwise handle requests
+              # for `/.well-known`.
+              location ^~ /.well-known {
+                  # The following 6 rules are borrowed from `.htaccess`
+
+                  location = /.well-known/carddav     { return 301 /remote.php/dav/; }
+                  location = /.well-known/caldav      { return 301 /remote.php/dav/; }
+                  # Anything else is dynamically handled by Nextcloud
+                  location ^~ /.well-known            { return 301 /index.php$uri; }
+
+                  try_files $uri $uri/ =404;
+              }
+
+              # Rules borrowed from `.htaccess` to hide certain paths from clients
+              location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)(?:$|/)  { return 404; }
+              location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console)              { return 404; }
+
+              # Ensure this block, which passes PHP files to the PHP process, is above the blocks
+              # which handle static assets (as seen below). If this block is not declared first,
+              # then Nginx will encounter an infinite rewriting loop when it prepends `/index.php`
+              # to the URI, resulting in a HTTP 500 error response.
+              location ~ \.php(?:$|/) {
+                  fastcgi_split_path_info ^(.+?\.php)(/.*)$;
+                  set $path_info $fastcgi_path_info;
+
+                  try_files $fastcgi_script_name =404;
+
+                  include fastcgi_params;
+                  fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
+                  fastcgi_param PATH_INFO $path_info;
+                  #fastcgi_param HTTPS on;
+
+                  fastcgi_param modHeadersAvailable true;         # Avoid sending the security headers twice
+                  fastcgi_param front_controller_active true;     # Enable pretty urls
+                  fastcgi_pass php-handler;
+
+                  fastcgi_intercept_errors on;
+                  fastcgi_request_buffering off;
+              }
+
+              location ~ \.(?:css|js|svg|gif)$ {
+                  try_files $uri /index.php$request_uri;
+                  expires 6M;         # Cache-Control policy borrowed from `.htaccess`
+                  access_log off;     # Optional: Don't log access to assets
+              }
+
+              location ~ \.woff2?$ {
+                  try_files $uri /index.php$request_uri;
+                  expires 7d;         # Cache-Control policy borrowed from `.htaccess`
+                  access_log off;     # Optional: Don't log access to assets
+              }
+
+              location / {
+                  try_files $uri $uri/ /index.php$request_uri;
+              }
+          }
+      }
+
 
   resources: {}
 
@@ -425,17 +597,17 @@ persistence:
     accessMode: ReadWriteOnce
     size: 200Gi
 
-resources: {}
+resources:
   # We usually recommend not to specify default resources and to leave this as a conscious
   # choice for the user. This also increases chances charts run on environments with little
   # resources, such as Minikube. If you do want to specify resources, uncomment the following
   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
+  limits:
   #  cpu: 100m
-  #  memory: 128Mi
-  # requests:
+    memory: 4Gi
+  requests:
   #  cpu: 100m
-  #  memory: 128Mi
+    memory: 1Gi
 
 ## Liveness and readiness probe values
 ## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes
@@ -476,8 +648,16 @@ nodeSelector: {}
 
 tolerations: []
 
-affinity: {}
-
+# To speed up file transfers
+affinity:
+  nodeAffinity:
+    requiredDuringSchedulingIgnoredDuringExecution:
+      nodeSelectorTerms:
+      - matchExpressions:
+        - key: cluster-ingress
+          operator: In
+          values:
+          - "true"
 
 ## Prometheus Exporter / Metrics
 ##

+ 27 - 5
plex.yaml

@@ -39,13 +39,20 @@ spec:
           name: media
         - mountPath: "/transcodes"
           name: tmpfs
+        # Quicksync
+        - name: "render-device"
+          mountPath: "/dev/dri/renderD128"
         env:
         - name: PUID
           value: "1000"
         - name: PGID
           value: "1000"
-        - name: NVIDIA_DRIVER_CAPABILITIES
-          value: "all"
+        ## NVIDIA
+        #- name: NVIDIA_DRIVER_CAPABILITIES
+        #  value: "all"
+        # Quicksync
+        securityContext:
+            privileged: true
         livenessProbe:
           httpGet:
             path: /web/index.html
@@ -54,9 +61,20 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 30
           timeoutSeconds: 10
-        resources:
-          limits:
-            nvidia.com/gpu: 1
+        ## NVIDIA
+        #resources:
+        #  limits:
+        #    nvidia.com/gpu: 1
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: gpu-node
+                operator: In
+                values:
+                - "true"
+
       volumes:
       - name: config
         persistentVolumeClaim:
@@ -72,6 +90,10 @@ spec:
         emptyDir:
           medium: Memory
           sizeLimit: 12Gi
+      # Quicksync
+      - name: "render-device"
+        hostPath:
+          path: "/dev/dri/renderD128"
 ---
 apiVersion: v1
 kind: Service

+ 10 - 0
rook/proxmox/proxmox-blockpool.yaml

@@ -0,0 +1,10 @@
+apiVersion: ceph.rook.io/v1
+kind: CephBlockPool
+metadata:
+  name: proxmox
+  namespace: rook-ceph
+spec:
+  failureDomain: host
+  replicated:
+    size: 3
+  deviceClass: hdd

+ 18 - 20
rook/rook-ceph-cluster-values.yaml

@@ -227,65 +227,63 @@ cephClusterSpec:
   #   # These labels can be passed as LabelSelector to Prometheus
   #   monitoring:
 
-  # TODO adjust these once everything is deployed
-  #  osd in particular should be greater once all ceph nodes have 32GB
+  # https://home.robusta.dev/blog/stop-using-cpu-limits
   resources:
     mgr:
       limits:
-        cpu: "500m"
+        cpu: 0
         memory: "1Gi"
       requests:
-        cpu: "250m"
+        cpu: 0
         memory: "512Mi"
     mon:
       limits:
-        cpu: "1000m"
+        cpu: 0
         memory: "2Gi"
       requests:
-        cpu: "500m"
-        #memory: "1Gi"
-        memory: "500Mi"
+        cpu: 0
+        memory: "1Gi"
     osd:
       limits:
-        cpu: "1000m"
+        cpu: 0
         memory: "4Gi"
       requests:
-        cpu: "500m"
+        cpu: 0
         memory: "1Gi"
     prepareosd:
       limits:
-        cpu: "1000m"
+        cpu: 0
         memory: "12Gi"
       requests:
-        cpu: "500m"
+        cpu: 0
         memory: "500Mi"
     mgr-sidecar:
       limits:
-        cpu: "500m"
+        cpu: 0
         memory: "100Mi"
       requests:
-        cpu: "100m"
+        cpu: 0
         memory: "40Mi"
     crashcollector:
       limits:
-        cpu: "500m"
+        cpu: 0
         memory: "60Mi"
       requests:
-        cpu: "100m"
+        cpu: 0
         memory: "60Mi"
     logcollector:
       limits:
-        cpu: "500m"
+        cpu: 0
         memory: "1Gi"
       requests:
-        cpu: "100m"
+        cpu: 0
         memory: "100Mi"
     cleanup:
       limits:
-        cpu: "500m"
+        cpu: 0
         memory: "1Gi"
       requests:
-        cpu: "500m"
+        cpu: 0
         memory: "100Mi"
 
   # The option to automatically remove OSDs that are out and are safe to destroy.

+ 16 - 6
seedbox_sync.py

@@ -1,8 +1,8 @@
 # rsync files from a seedbox to a local machine, exactly once, over SSH.
 #
 # Why?
-#  sonarr requires that any Remote Path Mappings have a local path reflecting its contents. This can be done with NFS or SSHFS, but those are difficult to set up in containers, and get wonky when the remote server reboots.
-#  rsync over SSH + cron doesn't care if the remote machine reboots, + easily runs in a container.
+#  *arr requires that any Remote Path Mappings have a local path reflecting its contents. This can be done with NFS or SSHFS, but those are difficult to set up in containers, and get wonky when the remote server reboots.
+#  rsync over SSH + cron doesn't care if the remote machine reboots, and easily runs in a container.
 
 # How?
 #  Usage: sonarr_sync.py my-seedbox /seedbox/path/to/data /local/working /local/metadata /local/data
@@ -19,7 +19,7 @@
 
 import subprocess
 import sys
-
+import concurrent.futures
 
 if len(sys.argv) != 6:
     print("One or more args are undefined")
@@ -31,17 +31,17 @@ r = subprocess.run(["ssh", host, "bash", "-c", f"IFS=$'\n'; ls {host_data_path}"
 
 available = {f for f in r.stdout.decode().split('\n') if f}
 
-# There's better ways to list a dir locally, but using bash +ls again avoids any possible formatting discrepencies.
+# There's better ways to list a dir locally, but using bash & ls again reduces possible formatting discrepencies.
 r = subprocess.run(["bash", "-c", f"IFS=$'\n'; ls {local_metadata_path}"], stdout=subprocess.PIPE, check=True)
 
 processed = {f for f in r.stdout.decode().split('\n') if f}
 
 new = available - processed
 
-for new_file in new:
+def process_file(new_file: str) -> None:
     # Be super cautious about empty file names, wouldn't want to `rm -rf` a folder by accident
     if not new_file:
-        continue
+        return
 
     print(f"Processing: {new_file}")
     subprocess.run(["rsync", "-rsvv", f'{host}:{host_data_path}/{new_file}', f'{local_working_path}'], check=True)
@@ -55,3 +55,13 @@ for new_file in new:
         raise
 
     subprocess.run(["rm", "-rf", f'{local_working_path}/{new_file}'], check=True)
+
+with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
+    future_to_new_files = {executor.submit(process_file, new_file): new_file for new_file in new}
+    for future in concurrent.futures.as_completed(future_to_new_files):
+        new_file = future_to_new_files[future]
+        try:
+            data = future.result()
+            print(f"Processed {new_file}")
+        except Exception as exc:
+            print('%r generated an exception: %s' % (new_file, exc))

+ 13 - 16
syncthing.yaml

@@ -16,13 +16,25 @@ spec:
       annotations:
         backup.velero.io/backup-volumes-excludes: data
     spec:
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: cluster-ingress
+                operator: In
+                values:
+                - "true"
+      hostNetwork: true
       containers:
         - name: syncthing
           image: linuxserver/syncthing:1.27.2
           imagePullPolicy: Always
           ports:
           - containerPort: 8384
-          name: http-web-svc
+            name: http-web-svc
+          - containerPort: 22000
+            name: transfer-svc
           env:
             - name: PUID
               value: "1000"
@@ -43,18 +55,3 @@ spec:
       - name: config
         persistentVolumeClaim:
           claimName: syncthing-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: syncthing-service
-  namespace: plex
-spec:
-  selector:
-    app: syncthing
-  type: ClusterIP
-  ports:
-  - name: syncthing-web-port
-    protocol: TCP
-    port: 8384
-    targetPort: http-web-svc

+ 34 - 11
traefik-helmchartconfig.yaml

@@ -5,15 +5,8 @@ metadata:
   namespace: kube-system
 spec:
   valuesContent: |-
-    ports:
-      web:
-        exposedPort: 80
-      websecure:
-        exposedPort: 443
-
     additionalArguments:
       # Auto cert renewal via cloudflare
-      #- "--certificatesresolvers.letsencrypt.acme.email=some-email-here"
       - "--certificatesresolvers.letsencrypt.acme.email=joshbicking@comcast.net"
       - "--certificatesresolvers.letsencrypt.acme.storage=/data/acme.json"
       - "--certificatesresolvers.letsencrypt.acme.dnschallenge.provider=cloudflare"
@@ -21,13 +14,16 @@ spec:
       - "--entrypoints.websecure.http.tls.certResolver=letsencrypt"
       - "--entrypoints.websecure.http.tls.domains[0].main=jibby.org"
       - "--entrypoints.websecure.http.tls.domains[0].sans=*.jibby.org"
+      - "--entrypoints.websecure.proxyProtocol.trustedIPs=127.0.0.1/32,172.16.69.0/24"
       - "--entrypoints.web.http.redirections.entryPoint.to=:443"
       - "--entrypoints.web.http.redirections.entrypoint.scheme=https"
 
-      - "--log.level=DEBUG"
+      - "--log.level=INFO"
       # debug, uncomment for testing
+      #- "--log.level=DEBUG"
       #- "--certificatesresolvers.letsencrypt.acme.caServer=https://acme-staging-v02.api.letsencrypt.org/directory"
 
+
     env:
       - name: CLOUDFLARE_EMAIL
         valueFrom:
@@ -47,14 +43,41 @@ spec:
       storageClass: ceph-block
 
     # Fix for acme.json file being changed to 660 from 600
+    # This can manifest as the incredibly unhelpful "the router <router name> uses a non-existent resolver: <resolver name>"
+    # https://github.com/traefik/traefik/issues/10241
     podSecurityContext:
-      fsGroup: null
+      fsGroup: 65532
+    deployment:
+      initContainers:
+      # The "volume-permissions" init container is required if you run into permission issues.
+      # Related issue: https://github.com/traefik/traefik-helm-chart/issues/396
+      - name: volume-permissions
+        image: busybox:latest
+        command: ["sh", "-c", "touch /data/acme.json; chmod -v 600 /data/acme.json"]
+        securityContext:
+          runAsNonRoot: true
+          runAsGroup: 65532
+          runAsUser: 65532
+        volumeMounts:
+          - name: data
+            mountPath: /data
+
 
     # ACME functionality is not supported when running Traefik as a DaemonSet
     #deployment:
     #  kind: DaemonSet
     service:
-      # type: ClusterIP
       spec:
+        # Required to show real IP to proxied services
         externalTrafficPolicy: Local
-    hostNetwork: true
+
+    # isn't necessary anymore, but reduces hairpinning
+    affinity:
+      nodeAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+          - matchExpressions:
+            - key: cluster-ingress
+              operator: In
+              values:
+              - "true"