|
@@ -1,17 +1,19 @@
|
|
# k3s
|
|
# k3s
|
|
|
|
|
|
|
|
+```
|
|
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --cluster-init" sh -
|
|
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --cluster-init" sh -
|
|
export NODE_TOKEN=$(cat /var/lib/rancher/k3s/server/node-token)
|
|
export NODE_TOKEN=$(cat /var/lib/rancher/k3s/server/node-token)
|
|
curl -sfL https://get.k3s.io | K3S_TOKEN=$NODE_TOKEN INSTALL_K3S_EXEC="server --server https://192.168.122.87:6443" INSTALL_K3S_VERSION=v1.23.6+k3s1 sh -
|
|
curl -sfL https://get.k3s.io | K3S_TOKEN=$NODE_TOKEN INSTALL_K3S_EXEC="server --server https://192.168.122.87:6443" INSTALL_K3S_VERSION=v1.23.6+k3s1 sh -
|
|
|
|
+```
|
|
|
|
|
|
|
|
|
|
# rook
|
|
# rook
|
|
|
|
|
|
|
|
+```
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm upgrade --install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph:1.9.2 -f rook-ceph-values.yaml
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm upgrade --install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph:1.9.2 -f rook-ceph-values.yaml
|
|
|
|
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm install --create-namespace --namespace rook-ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster:1.9.2 -f rook-ceph-cluster-values.yaml
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm install --create-namespace --namespace rook-ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster:1.9.2 -f rook-ceph-cluster-values.yaml
|
|
-
|
|
|
|
-## things in the rook folder
|
|
|
|
|
|
+```
|
|
|
|
|
|
## Sharing 1 CephFS instance between multiple PVCs
|
|
## Sharing 1 CephFS instance between multiple PVCs
|
|
|
|
|
|
@@ -46,9 +48,9 @@ If hostNetwork is enabled on the cluster, ensure rook-ceph-operator is not runni
|
|
This is great for setting up easy public downloads.
|
|
This is great for setting up easy public downloads.
|
|
|
|
|
|
- Create a user (rook/buckets/user-josh.yaml)
|
|
- Create a user (rook/buckets/user-josh.yaml)
|
|
-- kubectl -n rook-ceph get secret rook-ceph-object-user-ceph-objectstore-josh -o go-template='{{range $k,$v := .data}}{{printf "%s: " $k}}{{if not $v}}{{$v}}{{else}}{{$v | base64decode}}{{end}}{{"\n"}}{{end}}
|
|
|
|
-- Create bucket (rook/buckets/bucket.py::create_bucket)
|
|
|
|
-- Set policy (rook/buckets/bucket.py::set_public_read_policy)
|
|
|
|
|
|
+- `kubectl -n rook-ceph get secret rook-ceph-object-user-ceph-objectstore-josh -o go-template='{{range $k,$v := .data}}{{printf "%s: " $k}}{{if not $v}}{{$v}}{{else}}{{$v | base64decode}}{{end}}{{"\n"}}{{end}}`
|
|
|
|
+- Create bucket (`rook/buckets/bucket.py::create_bucket`)
|
|
|
|
+- Set policy (`rook/buckets/bucket.py::set_public_read_policy`)
|
|
- Upload file
|
|
- Upload file
|
|
```python
|
|
```python
|
|
from bucket import *
|
|
from bucket import *
|
|
@@ -58,6 +60,8 @@ conn.upload_file('path/to/s3-bucket-listing/index.html', 'public', 'index.html',
|
|
|
|
|
|
|
|
|
|
# nvidia driver (on debian)
|
|
# nvidia driver (on debian)
|
|
|
|
+
|
|
|
|
+```
|
|
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | sudo apt-key add -
|
|
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | sudo apt-key add -
|
|
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
|
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
|
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
|
|
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
|
|
@@ -66,20 +70,26 @@ wget https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/
|
|
sudo dpkg -i cuda-repo-debian11-11-6-local_11.6.2-510.47.03-1_amd64.deb
|
|
sudo dpkg -i cuda-repo-debian11-11-6-local_11.6.2-510.47.03-1_amd64.deb
|
|
sudo apt-key add /var/cuda-repo-debian11-11-6-local/7fa2af80.pub
|
|
sudo apt-key add /var/cuda-repo-debian11-11-6-local/7fa2af80.pub
|
|
sudo apt-get update
|
|
sudo apt-get update
|
|
|
|
+```
|
|
|
|
|
|
## install kernel headers
|
|
## install kernel headers
|
|
|
|
|
|
|
|
+```
|
|
sudo apt install cuda nvidia-container-runtime nvidia-kernel-dkms
|
|
sudo apt install cuda nvidia-container-runtime nvidia-kernel-dkms
|
|
|
|
|
|
sudo apt install --reinstall nvidia-kernel-dkms
|
|
sudo apt install --reinstall nvidia-kernel-dkms
|
|
|
|
+```
|
|
|
|
+
|
|
## verify dkms is actually running
|
|
## verify dkms is actually running
|
|
|
|
|
|
|
|
+```
|
|
sudo vi /etc/modprobe.d/blacklist-nvidia-nouveau.conf
|
|
sudo vi /etc/modprobe.d/blacklist-nvidia-nouveau.conf
|
|
|
|
|
|
blacklist nouveau
|
|
blacklist nouveau
|
|
options nouveau modeset=0
|
|
options nouveau modeset=0
|
|
|
|
|
|
sudo update-initramfs -u
|
|
sudo update-initramfs -u
|
|
|
|
+```
|
|
|
|
|
|
## configure containerd to use nvidia by default
|
|
## configure containerd to use nvidia by default
|
|
|
|
|
|
@@ -87,6 +97,7 @@ Copy https://github.com/k3s-io/k3s/blob/v1.24.2%2Bk3s2/pkg/agent/templates/templ
|
|
|
|
|
|
Edit the file:
|
|
Edit the file:
|
|
|
|
|
|
|
|
+```
|
|
<... snip>
|
|
<... snip>
|
|
conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}"
|
|
conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}"
|
|
{{end}}
|
|
{{end}}
|
|
@@ -98,6 +109,7 @@ Edit the file:
|
|
|
|
|
|
{{ if .PrivateRegistryConfig }}
|
|
{{ if .PrivateRegistryConfig }}
|
|
<... snip>
|
|
<... snip>
|
|
|
|
+```
|
|
|
|
|
|
|
|
|
|
& then `systemctl restart k3s`
|
|
& then `systemctl restart k3s`
|
|
@@ -106,9 +118,11 @@ Label your GPU-capable nodes: `kubectl label nodes <node name> gpu-node=true`
|
|
|
|
|
|
& then install the nvidia device plugin:
|
|
& then install the nvidia device plugin:
|
|
|
|
|
|
|
|
+```
|
|
helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
|
|
helm repo add nvdp https://nvidia.github.io/k8s-device-plugin
|
|
helm repo update
|
|
helm repo update
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm upgrade -i nvdp nvdp/nvidia-device-plugin --version=0.12.2 --namespace nvidia-device-plugin --create-namespace --set-string nodeSelector.gpu-node=true
|
|
KUBECONFIG=/etc/rancher/k3s/k3s.yaml helm upgrade -i nvdp nvdp/nvidia-device-plugin --version=0.12.2 --namespace nvidia-device-plugin --create-namespace --set-string nodeSelector.gpu-node=true
|
|
|
|
+```
|
|
|
|
|
|
|
|
|
|
Ensure the pods on the namespace are Running.
|
|
Ensure the pods on the namespace are Running.
|
|
@@ -130,15 +144,19 @@ sharing:
|
|
replicas: 5
|
|
replicas: 5
|
|
```
|
|
```
|
|
|
|
|
|
|
|
+```
|
|
$ helm upgrade -i nvdp nvdp/nvidia-device-plugin ... --set-file config.map.config=nvidia-device-plugin-config.yaml
|
|
$ helm upgrade -i nvdp nvdp/nvidia-device-plugin ... --set-file config.map.config=nvidia-device-plugin-config.yaml
|
|
|
|
+```
|
|
|
|
|
|
-# ceph client
|
|
|
|
|
|
+# ceph client for cephfs volumes
|
|
|
|
|
|
|
|
+```
|
|
sudo apt install ceph-fuse
|
|
sudo apt install ceph-fuse
|
|
|
|
|
|
sudo vi /etc/fstab
|
|
sudo vi /etc/fstab
|
|
|
|
|
|
192.168.1.1.,192.168.1.2:/ /ceph ceph name=admin,secret=<secret key>,x-systemd.mount-timeout=5min,_netdev,mds_namespace=data
|
|
192.168.1.1.,192.168.1.2:/ /ceph ceph name=admin,secret=<secret key>,x-systemd.mount-timeout=5min,_netdev,mds_namespace=data
|
|
|
|
+```
|
|
|
|
|
|
|
|
|
|
# disable mitigations
|
|
# disable mitigations
|
|
@@ -150,11 +168,21 @@ https://rpi4cluster.com/monitoring/k3s-grafana/
|
|
|
|
|
|
Tried https://github.com/prometheus-operator/kube-prometheus. The only way to persist dashboards is to add them to Jsonnet & apply the generated configmap.
|
|
Tried https://github.com/prometheus-operator/kube-prometheus. The only way to persist dashboards is to add them to Jsonnet & apply the generated configmap.
|
|
|
|
|
|
|
|
+# Exposing internal services
|
|
|
|
+
|
|
|
|
+```
|
|
|
|
+kubectl expose svc/some-service --name=some-service-external --port 1234 --target-port 1234 --type LoadBalancer
|
|
|
|
+```
|
|
|
|
+
|
|
|
|
+Service will then be available on port 1234 of any k8s node.
|
|
|
|
+
|
|
|
|
+
|
|
# libvirtd
|
|
# libvirtd
|
|
|
|
|
|
...
|
|
...
|
|
|
|
|
|
# Still to do
|
|
# Still to do
|
|
|
|
|
|
-deluge?
|
|
|
|
-gogs ingress (can't go through cloudflare without cloudflared on the client)
|
|
|
|
|
|
+- deluge?
|
|
|
|
+- gogs ssh ingress (can't go through cloudflare without cloudflared on the client)
|
|
|
|
+- Something better than `expose` for accessing internal services
|