K8s集成Containerd场景下,用户需要安装Ascend Docker Runtime,并参照本章节完成相应修改。使用任务yaml下发训练或推理任务时,NPU芯片的分配由Volcano和Ascend Device Plugin组件自动完成;NPU芯片及相关文件目录的挂载由Ascend Docker Runtime组件自动完成。
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] runtime_type = "io.containerd.runtime.v1.linux" runtime_engine = "" runtime_root = "" privileged_without_host_devices = false base_runtime_spec = "" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] [plugins."io.containerd.grpc.v1.cri".cni] bin_dir = "/opt/cni/bin" conf_dir = "/etc/cni/net.d" max_conf_num = 1 conf_template = "" [plugins."io.containerd.grpc.v1.cri".registry] [plugins."io.containerd.grpc.v1.cri".registry.mirrors] [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] endpoint = ["https://registry-1.docker.io"] [plugins."io.containerd.grpc.v1.cri".image_decryption] key_model = "" ... [plugins."io.containerd.monitor.v1.cgroups"] no_prometheus = false [plugins."io.containerd.runtime.v1.linux"] shim = "containerd-shim" runtime = "/usr/local/Ascend/Ascend-Docker-Runtime/ascend-docker-runtime" runtime_root = "" no_shim = false shim_debug = false [plugins."io.containerd.runtime.v2.task"] platforms = ["linux/amd64"] ...
vi /var/lib/kubelet/kubeadm-flags.env
增加如下加粗内容,其他参数已省略。
KUBELET_KUBEADM_ARGS="... --container-runtime=remote --container-runtime-endpoint=/run/containerd/containerd.sock ..."
systemctl stop docker isula
systemctl daemon-reload && systemctl restart containerd kubelet