昇腾社区首页
中文
注册

平台适配

MindCluster Volcano上报故障后,会在ConfigMap中记录节点故障信息。MindCluster Volcano获取节点故障信息的代码示例如下,若用户想要获取节点故障信息做其它业务逻辑处理,可参考实现对应的代码。
import "volcano.sh/volcano/pkg/scheduler/plugins/ascend-volcano-plugin/plugin"
func (fNode *FaultNode) setNodeHealthyByNodeD(node *plugin.NPUNode) {
 if !fNode.isNodeDEnabled(node) {
  klog.V(util.LogInfoLev).Infof("node %s nodeD not enabled", node.Name)
  fNode.setNodeDValue(false)
  return
 }
 fNode.setNodeDValue(true)
 // 1. last node heartbeat update time until now being greater than maxInterval indicates unhealthy
 if !fNode.isNodeHealthyByHeartbeat() {
  fNode.setIsFaultNodeValue(true)
  fNode.setNodeHealthStateValue(NodeUnhealthy)
  klog.V(util.LogInfoLev).Infof("Node %s health state set %s for wrong heartbeat", node.Name, NodeUnhealthy)
 }
}