Portal:Toolforge/Admin/Kubernetes/Upgrading Kubernetes/1.26 to 1.27 notes
Appearance
Working etherpad: https://etherpad.wikimedia.org/p/k8s-1.26-to-1.27-upgrade
Prepare packages
- [] send and merge a patch similar to https://gerrit.wikimedia.org/r/c/operations/puppet/+/1058560 but for the destination version
- [] check that the packages show up in https://apt.wikimedia.org/wikimedia/pool/thirdparty/
Toolsbeta
prep
- [] run prepare upgrade cookbook
- [] downtime project via https://alerts.wikimedia.org/?q=team%3Dwmcs
- [] update topic on -cloud
~ $ sudo cookbook wmcs.toolforge.k8s.prepare_upgrade --cluster-name toolsbeta --src-version 1.26.15 --dst-version 1.27.16
control nodes
toolsbeta-test-k8s-control-7
- [] run upgrade node cookbook
- [] check that services start healthy
- [] depool control-8 and -9 via haproxy, check that control-7 is still doing ok
ssh tools-test-k8s-haproxy-6.toolsbeta.eqiad1.wikimedia.cloud
sudo puppet agent --disable "slavina k8s upgrade" sudo nano /etc/haproxy/conf.d/k8s-api-servers.cfg sudo systemctl reload haproxy
check:
echo "show stat" | sudo socat stdio /run/haproxy/haproxy.sock | grep k8s-api
revert:
sudo puppet agent --enable sudo run-puppet-agent sudo systemctl reload haproxy
~ $ sudo cookbook wmcs.toolforge.k8s.worker.upgrade --cluster-name tools --hostname tools-k8s-control-7 --src-version 1.26.15 --dst-version 1.27.16
toolsbeta-test-k8s-control-8
- [] run upgrade node cookbook
- [] check that services start healthy
toolsbeta-test-k8s-control-9
- [] run upgrade node cookbook
- [] check that services start healthy
worker nodes
run upgrade node cookbook for each.
toolsbeta-test-k8s-worker-nfs-{1-4} [] nfs-1 [] nfs-2 [] nfs-3 [] nfs-4 toolsbeta-test-k8s-worker-{10,11} [] 10 [] 11
ingress nodes
- [] toolsbeta-test-k8s-ingress-6
- [] toolsbeta-test-k8s-ingress-7
- [] toolsbeta-test-k8s-ingress-8
cleanup
- [] remove downtime
- [] revert topic change
Tools
prep
- [] run prepare upgrade cookbook
- [] downtime project via https://alerts.wikimedia.org/?q=team%3Dwmcs
- [] update topic on -cloud
~ $ sudo cookbook wmcs.toolforge.k8s.prepare_upgrade --cluster-name tools --src-version 1.26.15 --dst-version 1.27.16
control nodes
tools-k8s-control-7
- [] run upgrade node cookbook
- [] check that services start healthy
- [] depool control-8 and -9 via haproxy, check that control-7 is still doing ok
ssh tools-k8s-haproxy-5.tools.eqiad1.wikimedia.cloud sudo puppet agent --disable "slavina k8s upgrade" sudo nano /etc/haproxy/conf.d/k8s-api-servers.cfg sudo systemctl reload haproxy
check:
echo "show stat" | sudo socat stdio /run/haproxy/haproxy.sock | grep k8s-api
revert:
sudo puppet agent --enable sudo run-puppet-agent sudo systemctl reload haproxy
~ $ sudo cookbook wmcs.toolforge.k8s.worker.upgrade --cluster-name tools --hostname tools-k8s-control-7 --src-version 1.26.15 --dst-version 1.27.16
tools-k8s-control-8
- [] run upgrade node cookbook
- [] check that services start healthy
tools-k8s-control-9
- [] run upgrade node cookbook
- [] check that services start healthy
worker nodes
(get list)
root@tools-k8s-control-8:~# for node in $(kubectl get nodes -o json | jq '.items[].metadata.name' -r); do echo "* [] $node"; done
run upgrade node cookbook for each. it's ok to do a couple in parallel
- [] tools-k8s-worker-102
- [] tools-k8s-worker-103
- [] tools-k8s-worker-105
- [] tools-k8s-worker-106
- [] tools-k8s-worker-107
- [] tools-k8s-worker-108
- [] tools-k8s-worker-nfs-1
- [] tools-k8s-worker-nfs-2
- [] tools-k8s-worker-nfs-3
- [] tools-k8s-worker-nfs-5
- [] tools-k8s-worker-nfs-6
- [] tools-k8s-worker-nfs-7
- [] tools-k8s-worker-nfs-8
- [] tools-k8s-worker-nfs-9
- [] tools-k8s-worker-nfs-10
- [] tools-k8s-worker-nfs-11
- [] tools-k8s-worker-nfs-12
- [] tools-k8s-worker-nfs-13
- [] tools-k8s-worker-nfs-14
- [] tools-k8s-worker-nfs-16
- [] tools-k8s-worker-nfs-17
- [] tools-k8s-worker-nfs-19
- [] tools-k8s-worker-nfs-20
- [] tools-k8s-worker-nfs-21
- [] tools-k8s-worker-nfs-22
- [] tools-k8s-worker-nfs-23
- [] tools-k8s-worker-nfs-24
- [] tools-k8s-worker-nfs-26
- [] tools-k8s-worker-nfs-27
- [] tools-k8s-worker-nfs-28
- [] tools-k8s-worker-nfs-29
- [] tools-k8s-worker-nfs-30
- [] tools-k8s-worker-nfs-31
- [] tools-k8s-worker-nfs-32
- [] tools-k8s-worker-nfs-33
- [] tools-k8s-worker-nfs-34
- [] tools-k8s-worker-nfs-35
- [] tools-k8s-worker-nfs-36
- [] tools-k8s-worker-nfs-37
- [] tools-k8s-worker-nfs-38
- [] tools-k8s-worker-nfs-39
- [] tools-k8s-worker-nfs-40
- [] tools-k8s-worker-nfs-41
- [] tools-k8s-worker-nfs-42
- [] tools-k8s-worker-nfs-43
- [] tools-k8s-worker-nfs-44
- [] tools-k8s-worker-nfs-45
- [] tools-k8s-worker-nfs-46
- [] tools-k8s-worker-nfs-47
- [] tools-k8s-worker-nfs-48
- [] tools-k8s-worker-nfs-49
- [] tools-k8s-worker-nfs-50
- [] tools-k8s-worker-nfs-53
- [] tools-k8s-worker-nfs-54
- [] tools-k8s-worker-nfs-55
- [] tools-k8s-worker-nfs-56
- [] tools-k8s-worker-nfs-57
- [] tools-k8s-worker-nfs-58
- [] tools-k8s-worker-nfs-60
- [] tools-k8s-worker-nfs-61
- [] tools-k8s-worker-nfs-62
- [] tools-k8s-worker-nfs-63
- [] tools-k8s-worker-nfs-64
ingress nodes
- [] kubectl -n ingress-nginx-gen2 scale deployment ingress-nginx-gen2-controller --replicas=2
run upgrade node cookbook for each:
- [] tools-k8s-ingress-7
- [] tools-k8s-ingress-8
- [] tools-k8s-ingress-9
- [] revert afterwards: kubectl -n ingress-nginx-gen2 scale deployment ingress-nginx-gen2-controller --replicas=3
cleanup
- [] remove downtime
- [] revert topic change