文档,https://kubernetes.io/zh-cn/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/
https://cloud-atlas.readthedocs.io/zh-cn/latest/kubernetes/administer/remove_node.html
0.Node相关命令
kubectl get nodes
kuebctl describe node
describe命令的Node信息
- Node基本信息:名称、标签、创建时间等
- Node当前的状态,Node启动后会进行自检工作,磁盘是否满,内存是否不足,若都正常则切换为Ready状态。
- Node的主机地址与主机名
- Node上的资源总量:CPU,内存,最大可调度Pod数量等
- Node可分配资源量:当前Node可用于分配的资源量
- 主机系统信息:主机唯一标识符UUID,Linux kernel版本号,操作系统,kubernetes版本,kubelet与kube-proxy版本
- 当前正在运行的Pod列表及概要信息
- 已分配的资源使用概要,例如资源申请的最低、最大允许使用量占系统总量的百分比
- Node相关的Event信息。
1.节点下线
- 流程
- node节点配置pod不可调度到该节点上。
- node节点上的服务驱逐。
- node节点下线
1.查看node
下线掉node01节点
shell
[root@kube-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready control-plane,master 41h v1.22.17
kube-node01 Ready <none> 18h v1.22.17
kube-node02 Ready <none> 18h v1.22.17
kube-node03 Ready <none> 18h v1.22.17
[root@kube-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready control-plane,master 41h v1.22.17
kube-node01 Ready <none> 18h v1.22.17
kube-node02 Ready <none> 18h v1.22.17
kube-node03 Ready <none> 18h v1.22.17
2.设置下线节点为不可调度状态
shell
# 配置节点不可调度
kubectl cordon <NODE_NAME>
此时查看节点,驱逐后查看节点状态变为:SchedulingDisabled
[root@kube-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready control-plane,master 42h v1.22.17
kube-node01 Ready,SchedulingDisabled <none> 19h v1.22.17
kube-node02 Ready <none> 19h v1.22.17
kube-node03 Ready <none> 19h v1.22.17
# 配置节点不可调度
kubectl cordon <NODE_NAME>
此时查看节点,驱逐后查看节点状态变为:SchedulingDisabled
[root@kube-master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready control-plane,master 42h v1.22.17
kube-node01 Ready,SchedulingDisabled <none> 19h v1.22.17
kube-node02 Ready <none> 19h v1.22.17
kube-node03 Ready <none> 19h v1.22.17
设置这个之后,并不影响已有pod运行
3.驱逐Pod资源
shell
kubectl drain --ignore-daemonsets <NODE_NAME> --delete-emptydir-data
# --ignore-daemonsets 选项为忽略DaemonSet类型的pod
# --delete-emptydir-data 清理本地挂载数据
[root@kube-master ~]# kubectl drain --ignore-daemonsets kube-node01 --delete-emptydir-data
node/kube-node01 already cordoned
WARNING: ignoring DaemonSet-managed Pods: kube-system/calico-node-g4c7n, kube-system/kube-proxy-pmkmh
evicting pod kube-system/metrics-server-54544fbf96-qgg9m
evicting pod default/deployapp-7749464894-jb2xx
evicting pod default/nginx-6799fc88d8-cp555
evicting pod kube-system/calico-typha-67c6dc57d6-frds4
pod/calico-typha-67c6dc57d6-frds4 evicted
pod/metrics-server-54544fbf96-qgg9m evicted
pod/deployapp-7749464894-jb2xx evicted
pod/nginx-6799fc88d8-cp555 evicted
node/kube-node01 evicted
#此时,再查看已经node01没有pod资源
[root@kube-master ~]# kubectl get pod -A -owide |grep node01
kube-system calico-node-g4c7n 1/1 Running 2 (54m ago) 19h 10.103.236.202 kube-node01 <none> <none>
kube-system kube-proxy-pmkmh 1/1 Running 2 (54m ago) 19h 10.103.236.202 kube-node01 <none> <none>
kubectl drain --ignore-daemonsets <NODE_NAME> --delete-emptydir-data
# --ignore-daemonsets 选项为忽略DaemonSet类型的pod
# --delete-emptydir-data 清理本地挂载数据
[root@kube-master ~]# kubectl drain --ignore-daemonsets kube-node01 --delete-emptydir-data
node/kube-node01 already cordoned
WARNING: ignoring DaemonSet-managed Pods: kube-system/calico-node-g4c7n, kube-system/kube-proxy-pmkmh
evicting pod kube-system/metrics-server-54544fbf96-qgg9m
evicting pod default/deployapp-7749464894-jb2xx
evicting pod default/nginx-6799fc88d8-cp555
evicting pod kube-system/calico-typha-67c6dc57d6-frds4
pod/calico-typha-67c6dc57d6-frds4 evicted
pod/metrics-server-54544fbf96-qgg9m evicted
pod/deployapp-7749464894-jb2xx evicted
pod/nginx-6799fc88d8-cp555 evicted
node/kube-node01 evicted
#此时,再查看已经node01没有pod资源
[root@kube-master ~]# kubectl get pod -A -owide |grep node01
kube-system calico-node-g4c7n 1/1 Running 2 (54m ago) 19h 10.103.236.202 kube-node01 <none> <none>
kube-system kube-proxy-pmkmh 1/1 Running 2 (54m ago) 19h 10.103.236.202 kube-node01 <none> <none>
❌ 注意
一般而言驱逐操作就是把该节点上的pod资源调度到其他节点,在生产环节中,一个node节点上可能会运行多个pod资源, 如果使用驱逐命令,可能会导致有大量的pod同时删除和创建,对业务抖动较为严重。一般生产环境通过命令对节点上的pod资源进行手动删除,确保服务整体稳定性
shell
# 获取node节点上所有的pod名称
kubectl get pod -A -o wide | grep <NODE_NAME>
# 删除pod资源,重新调度到其他节点
kubectl delete pod -n <NAMESPACE> <POD_NAME>
# 获取node节点上所有的pod名称
kubectl get pod -A -o wide | grep <NODE_NAME>
# 删除pod资源,重新调度到其他节点
kubectl delete pod -n <NAMESPACE> <POD_NAME>
4.删除node
shell
# 验证该node上无pod资源
kubectl get pod -A -o wide | grep <NODE_NAME>
#验证该节点上无业务pod,即可对该node进行删除操作
kubectl delete node <NODE_NAME>
# 验证已无该node资源
kubectl get node | grep <NODE_NAME>
# 验证该node上无pod资源
kubectl get pod -A -o wide | grep <NODE_NAME>
#验证该节点上无业务pod,即可对该node进行删除操作
kubectl delete node <NODE_NAME>
# 验证已无该node资源
kubectl get node | grep <NODE_NAME>
如果是云上节点不用之后,可以直接删除节点
5.清理环境
shell
kubeadm reset
ifconfig tunl0 down && ip link delete tunl0
rm -fr /etc/kubernetes/*
rm -fr /var/lib/cni/*
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm --clear
systemctl stop docker.socket
systemctl stop kubelet
kubeadm reset
ifconfig tunl0 down && ip link delete tunl0
rm -fr /etc/kubernetes/*
rm -fr /var/lib/cni/*
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
ipvsadm --clear
systemctl stop docker.socket
systemctl stop kubelet
2.新节点上线
shell
#查看token,如果显示空则token已过期
[root@kube-master ~]# kubeadm token list
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
qa2k39.fg9cjrmbdfsu0w29 3h 2024-04-12T06:59:46Z authentication,signing <none> system:bootstrappers:kubeadm:default-node-token
#token过期,重新创建token
kubeadm token create
#删除token
kubeadm token delete <token前面的id>
#获取hash值
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2> /dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
[root@kube-node01 ~]# kubeadm join 10.103.236.201:6443 --token qa2k39.fg9cjrmbdfsu0w29 --discovery-token-ca-cert-hash sha256:89ad2d95b4ffcdbf9370ccc4925f0195a80e98e5436404ecef548091db31b234
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
#查看token,如果显示空则token已过期
[root@kube-master ~]# kubeadm token list
TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
qa2k39.fg9cjrmbdfsu0w29 3h 2024-04-12T06:59:46Z authentication,signing <none> system:bootstrappers:kubeadm:default-node-token
#token过期,重新创建token
kubeadm token create
#删除token
kubeadm token delete <token前面的id>
#获取hash值
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2> /dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
[root@kube-node01 ~]# kubeadm join 10.103.236.201:6443 --token qa2k39.fg9cjrmbdfsu0w29 --discovery-token-ca-cert-hash sha256:89ad2d95b4ffcdbf9370ccc4925f0195a80e98e5436404ecef548091db31b234
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
3.节点维护
4. 修改node中Pod数目
kubernetes版本要求: 1.24+
4.1修改配置
systemctl status kubelet
● kubelet.service - kubelet: The Kubernetes Node Agent
Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled)
Drop-In: /usr/lib/systemd/system/kubelet.service.d
└─10-kubeadm.conf
Active: active (running) since Thu 2023-05-04 15:00:49 CST; 4min 45s ago
Docs: https://kubernetes.io/docs/
Main PID: 43623 (kubelet)
Tasks: 69
Memory: 192.0M
CGroup: /system.slice/kubelet.service
└─43623 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --container-runtime=remote --container-runtime-endpoint=unix:///va...
...
systemctl status kubelet
● kubelet.service - kubelet: The Kubernetes Node Agent
Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled)
Drop-In: /usr/lib/systemd/system/kubelet.service.d
└─10-kubeadm.conf
Active: active (running) since Thu 2023-05-04 15:00:49 CST; 4min 45s ago
Docs: https://kubernetes.io/docs/
Main PID: 43623 (kubelet)
Tasks: 69
Memory: 192.0M
CGroup: /system.slice/kubelet.service
└─43623 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --container-runtime=remote --container-runtime-endpoint=unix:///va...
...
vim /var/lib/kubelet/config.yaml
作者:Lance.Wu
链接:https://www.orchome.com/16833
来源:OrcHome
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
vim /var/lib/kubelet/config.yaml
apiVersion: kubelet.config.k8s.io/v1beta1
...
kind: KubeletConfiguration
...
volumeStatsAggPeriod: 0s
maxPods: 150 # 添加的配置内容,默认为110个,设置成150个。
vim /var/lib/kubelet/config.yaml
作者:Lance.Wu
链接:https://www.orchome.com/16833
来源:OrcHome
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
vim /var/lib/kubelet/config.yaml
apiVersion: kubelet.config.k8s.io/v1beta1
...
kind: KubeletConfiguration
...
volumeStatsAggPeriod: 0s
maxPods: 150 # 添加的配置内容,默认为110个,设置成150个。
- 重启
bash
systemctl daemon-reload
systemctl restart kubelet
systemctl daemon-reload
systemctl restart kubelet
- 验证结果
bash
kubectl describe node node-name|grep -A6 "Capacity\|Allocatable"
kubectl describe node node-name|grep -A6 "Capacity\|Allocatable"