kubectl get pods -n kube-system -o wide
回显示例:
root@ubuntu:~# kubectl get pods -n kube-system -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES ascend-device-plugin-daemonset-910-85p9v 1/1 Running 0 19h 192.168.185.251 ubuntu <none> <none> ...
kubectl logs -n kube-system devicePlugin组件的Pod名字
如果出现如下内容表示组件正常。
root@ubuntu:~# kubectl logs -n kube-system ascend-device-plugin-daemonset-910-85p9v [INFO] 2022/11/21 11:20:04.534992 1 hwlog@v0.0.0/api.go:96 devicePlugin.log's logger init success [INFO] 2022/11/21 11:20:04.535750 1 main.go:127 ascend device plugin starting and the version is v5.0.RC1_linux-x86_64 [INFO] 2022/11/21 11:20:05.992823 1 K8stool@v0.0.0/self_K8s_client.go:116 start to decrypt cfg [INFO] 2022/11/21 11:20:06.002773 1 K8stool@v0.0.0/self_K8s_client.go:125 Config loaded from file: ****tc/mindx-dl/device-plugin/.config/config6 [INFO] 2022/11/21 11:20:06.003751 1 main.go:153 init kube client success [INFO] 2022/11/21 11:20:06.003923 1 device/ascendcommon.go:104 Found Huawei Ascend, deviceType: Ascend910, deviceName: Ascend910-4 [INFO] 2022/11/21 11:20:06.003970 1 main.go:160 init device manager success [INFO] 2022/11/21 11:20:06.004157 21 device/manager.go:125 starting the listen device [INFO] 2022/11/21 11:20:06.004285 7 device/manager.go:206 Serve start [INFO] 2022/11/21 11:20:06.004970 7 server/server.go:88 device plugin (Ascend910) start serving. [INFO] 2022/11/21 11:20:06.007285 7 server/server.go:36 register Ascend910 to kubelet success. [INFO] 2022/11/21 11:20:06.007521 7 server/pod_resource.go:44 pod resource client init success. [INFO] 2022/11/21 11:20:06.007755 35 server/plugin.go:87 ListAndWatch resp devices: Ascend910-4 Healthy # 上报K8s的芯片,请以实际为准 [INFO] 2022/11/21 11:20:11.063218 21 kubeclient/client_server.go:123 reset annotation success ...
kubectl describe node K8s中的节点名
root@ubuntu:~# kubectl describe node ubuntu Name: ubuntu Roles: worker Labels: accelerator=huawei-Ascend910 beta.kubernetes.io/arch=amd64 ... CreationTimestamp: Wed, 22 Dec 2021 20:10:04 +0800 Taints: <none> Unschedulable: false ... Capacity: cpu: 72 ephemeral-storage: 479567536Ki huawei.com/Ascend910: 8 # K8s已感知到该节点总共有8个NPU ... Allocatable: cpu: 72 ephemeral-storage: 441969440446 huawei.com/Ascend910: 8 # K8s已感知到该节点可供分配的NPU总个数为8 ...
root@ubuntu:~# kubectl describe node ubuntu Name: ubuntu Roles: worker Labels: accelerator=huawei-Ascend310 beta.kubernetes.io/arch=amd64 ... CreationTimestamp: Wed, 22 Dec 2021 20:10:04 +0800 Taints: <none> Unschedulable: false ... Capacity: cpu: 72 ephemeral-storage: 163760Mi huawei.com/Ascend310: 4 ... Allocatable: cpu: 72 ephemeral-storage: 154543324929 huawei.com/Ascend310: 4 ...
root@ubuntu:~# kubectl describe node ubuntu Name: ubuntu Roles: worker Labels: accelerator=huawei-Ascend310 beta.kubernetes.io/arch=amd64 ... CreationTimestamp: Wed, 22 Dec 2021 20:10:04 +0800 Taints: <none> Unschedulable: false ... Capacity: cpu: 96 ephemeral-storage: 95596964Ki huawei.com/Ascend310P: 3 ... Allocatable: cpu: 96 ephemeral-storage: 88102161877 huawei.com/Ascend310P: 3 ...
root@ubuntu:~# kubectl describe node ubuntu Name: ubuntu Roles: worker Labels: accelerator=huawei-Ascend310 beta.kubernetes.io/arch=amd64 ... CreationTimestamp: Wed, 22 Dec 2021 20:10:04 +0800 Taints: <none> Unschedulable: false ... Capacity: cpu: 96 ephemeral-storage: 95596964Ki huawei.com/Ascend310P-IPro: 1 huawei.com/Ascend310P-V: 1 huawei.com/Ascend310P-VPro: 1 ... Allocatable: cpu: 96 ephemeral-storage: 88102161877 huawei.com/Ascend310P-IPro: 1 huawei.com/Ascend310P-V: 1 huawei.com/Ascend310P-VPro: 1 ...