昇腾社区首页
中文
注册

样例参考

本节给出实现RPing功能的完整代码样例。

该样例实现了一个单机8卡组网场景的RPing功能。

代码样例

样例代码文件包含“rping_test.cc”与“rping_test.h”文件。

  • 头文件“rping_test.h”定义了device的IP地址,以单机八卡为例,具体如下:
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    #include <stdio.h>
    #include <stdint.h>
    #include <string.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include "acl/acl.h"
    #include "acl/acl_prof.h"
    #include "hccl/hccn_rping.h"
    
    // 以单机8卡为例,以下配置中IP地址仅为示例
    #define ipLen 16
    
    char deviceIp[8][ipLen] =
    {
        "192.168.99.127",
        "192.168.99.128",
        "192.168.99.129",
        "192.168.99.130",
        "192.168.99.131",
        "192.168.99.132",
        "192.168.99.133",
        "192.168.99.134"
    };
    
  • “rping_test.cc”文件具体实现了RPing功能,代码示例如下:
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    #include <chrono>
    #include <vector>
    #include <thread>
    #include <atomic>
    #include "rping_test.h"
    
    int singleDevAllProc(int devId, std::vector<int> devices, int devClientId, std::atomic<bool> *isStop)
    {
        // 初始化device
        HccnRpingInitAttr *initAttr = new HccnRpingInitAttr();
        initAttr->mode = HCCN_RPING_MODE_ROCE;
        initAttr->port = 13886;
        initAttr->npuNum = 128;
        initAttr->bufferSize = 4096 * 50; // 必须大于这个值:pktnum * 2048 * targetNum
        initAttr->ipAddr = new char[ipLen];
        strcpy(initAttr->ipAddr, deviceIp[devId]);
        HccnRpingCtx rpingCtx = nullptr;
        aclrtSetDevice(devId);
        HccnResult ret = HccnRpingInit(devId, initAttr, &rpingCtx);
        if (ret != HCCN_SUCCESS) {
            printf("device init failed.\n");
            return -1;
        }
        printf("rpingCtx [%p]", rpingCtx);
        printf("device[%d] init success!\n", devId);
        if (devId != devClientId) {
            sleep(20); // target启动后需要一直保持等待探测请求的状态
            while (isStop->load() == false) {
                sleep(1);
                continue;
            }
            HccnRpingDeinit(rpingCtx);
            delete[] initAttr->ipAddr;
            delete initAttr;
            return 0;
        }
        // 添加target
        int targetNum = devices.size() - 1;
        HccnRpingTargetInfo *target = new HccnRpingTargetInfo[targetNum];
        for (int i = 0; i < devices.size() - 1; i++) {
            int devTargetId = devices[i];
            target[i].srcPort = 0;
    
            target[i].sl = 4;
            target[i].tc = (33 & 0x3f) << 2;
            target[i].port = 13886;
            target[i].payloadLen = 12;
            target[i].srcIp = new char[ipLen];
            target[i].dstIp = new char[ipLen];
            char payload[12] = "hellotarget";
            strcpy(target[i].payload, payload);
            strcpy(target[i].srcIp, deviceIp[devClientId]);
            strcpy(target[i].dstIp, deviceIp[devTargetId]);
        }
    
        ret = HccnRpingAddTarget(rpingCtx, targetNum, target);
        if (ret != HCCN_SUCCESS) {
            delete[] target;
            printf("device add target failed.\n");
            return -1;
        }
        printf("device[%d] add target success!\n", devId);
        // 发起请求
        uint32_t pktNum = 10;   // 发给每个target的报文数量
        uint32_t interval = 1;  // ms
        uint32_t timeout = 100; // ms
        ret = HccnRpingBatchPingStart(rpingCtx, pktNum, interval, timeout);
        if (ret != HCCN_SUCCESS) {
            delete[] target;
            printf("device start ping failed.\n");
            return -1;
        }
        printf("device[%d] start ping!\n", devId);
        // 获取结果
        HccnRpingResultInfo *result = new HccnRpingResultInfo[targetNum];
        HccnResult hccnRet = HCCN_E_AGAIN;
        while(hccnRet == HCCN_E_AGAIN) {
            sleep(1);
            hccnRet =  HccnRpingGetResult(rpingCtx, targetNum, target, result);
        }
        if (ret != HCCN_SUCCESS) {
            delete[] target;
            delete[] result;
            printf("device get result failed.\n");
            return -1;
        }
        for (int i = 0; i < targetNum; i++) {
            printf("txPkt[%u] rxPkt[%u] minRTT[%u] maxRTT[%u] avgRTT[%u] state[%u]\n",
                result[i].txPkt,
                result[i].rxPkt,
                result[i].minRTT,
                result[i].maxRTT,
                result[i].avgRTT,
                result[i].state);
        }
    
        unsigned int payloadLenOutput = 0;
        void* payloadOutput;
        HccnRpingGetPayload(rpingCtx, &payloadOutput, &payloadLenOutput);
        int payloadNum = payloadLenOutput / 2048;
        for (int i = 0; i < payloadNum; i++) {
            HccnRpingPayloadHead *head = static_cast<HccnRpingPayloadHead*>(payloadOutput);
            printf("[%dth] srcIp:%s, dstIp:%s, payloadLen:%d, t1:%llu %llu, t2:%llu %llu, t3:%llu %llu, t4:%llu %llu, task id:%u\n",
    			i,
                            head->srcIp, head->dstIp,
                            head->payloadLen,
    			head->t1.sec, head->t1.usec,
    			head->t2.sec, head->t2.usec,
    			head->t3.sec, head->t3.usec,
    			head->t4.sec, head->t4.usec,
    			head->rpingBatchId);
            char* ptrTmp = static_cast<char*>(payloadOutput);
    	ptrTmp += 2048;
    	payloadOutput = ptrTmp;
        }
        
        ret = HccnRpingBatchPingStop(rpingCtx);
        if (ret != HCCN_SUCCESS) {
            delete[] target;
            delete[] result;
            printf("device stop ping failed.\n");
            return -1;
        }
        printf("device[%d] stop ping!\n", devId);
    
        HccnRpingRemoveTarget(rpingCtx, targetNum, target);
        if (ret != HCCN_SUCCESS) {
            delete[] target;
            delete[] result;
            printf("device remove target failed.\n");
            return -1;
        }
        printf("device[%d] remove target success!\n", devId);
    
        // 释放内存
        ret = HccnRpingDeinit(rpingCtx);
        if (ret != HCCN_SUCCESS) {
            printf("device deinit failed.\n");
            return -1;
        }
        printf("device[%d] deinit success!\n", devId);
        for (int i = 0; i < targetNum; i++) {
            delete[] target[i].srcIp;
            delete[] target[i].dstIp;
        }
        delete[] result;
        delete[] target;
        printf("rpingCtx test success!!!\n");
        return 0;
    }
    int main(int argc, char *argv[])
    {
        // 获取设备个数
        int deviceNum = argc - 2;
        // 获取循环次数
        int loop = atoi(argv[1]);
        // 记录client Id
        int devClientId = atoi(argv[2]);
        // 记录全部的device Id
        std::vector<int> devices;
        for (int i = 0; i < deviceNum; i++) {
            int dev = atoi(argv[i + 2]);
            bool isRepeat = false;
            for (int j = 0; j < devices.size(); j++) {
                if (dev == devices[j]) {
                    isRepeat = true;
                    printf("%d is repeat!\n", dev);
                    break;
                }   
            }
            if (!isRepeat) {
                printf("dev: %d, isrepeat: %d\n", dev, isRepeat);
               devices.push_back(dev);
            }
        }
        std::vector<std::thread> test_threads;
        std::atomic<bool> isStop{false};
        for (int i = 0; i < loop; i++) {
            printf("\n*************分割线**************\n\n");
            printf("%dth process start!!\n", i+1);
            for (int j = 0; j < devices.size(); j++) {
                test_threads.push_back(std::thread(singleDevAllProc, devices[j], devices, devClientId, &isStop));
                printf("device[%d] start running!!\n", devices[j]);
            }
            for (int j = 0; j < deviceNum; j++) {
                isStop.store(true);
                if (test_threads[j].joinable()) {
                    test_threads[j].join();
                    printf("device[%d] stop running!!\n", devices[j]);
                }
            }
            test_threads.clear();
        }
        return 0;
    }
    

Makefile文件参考

编译需要的Makefile文件如下:

#
#loading path
#--------------------------------------------------------------------------------------------------------------------------------------------------
CXXFLAGS := -std=c++11\
        -Werror\
        -fstack-protector-strong\
        -fPIE -pie\
        -O2\
        -g\
        -s\
        -Wl,-z,relro\
        -Wl,-z,now\
        -Wl,-z,noexecstack\
        -Wl,--copy-dt-needed-entries
HCCL_INC_DIR = ${ASCEND_DIR}/include
HCCL_LIB_DIR = ${ASCEND_DIR}/lib64
ACL_INC_DIR = ${ASCEND_DIR}/include
ACL_LIB_DIR = ${ASCEND_DIR}/lib64
LIST = rping_test
#
#library flags
#--------------------------------------------------------------------------------------------------------------------------------------------------
LIBS = -L$(HCCL_LIB_DIR) -lhccl_plf\
        -L$(ACL_LIB_DIR) -lascendcl
INCLUDEDIRS = -I$(HCCL_INC_DIR)\
                -I$(ACL_INC_DIR)
#
#make
#--------------------------------------------------------------------------------------------------------------------------------------------------
all:
	@mkdir -p bin
	g++ $(CXXFLAGS) rping_test.cc $(INCLUDEDIRS) -o rping_test $(LIBS)
	@printf "\nRPing_test compile completed\n" 
	mv $(LIST) ./bin
.PHONY: clean
clean:
	rm -rf ./bin/*_test

依赖环境变量

编译本节所示样例代码前需配置如下环境变量。

source /usr/local/Ascend/ascend-toolkit/set_env.sh
export ASCEND_DIR=/usr/local/Ascend/ascend-toolkit/latest

其中“/usr/local/Ascend”为CANN软件使用root用户安装的默认安装路径,如CANN软件使用普通用户安装或指定路径安装,请自行替换。

编译执行样例

  1. 执行“make”命令,会在bin目录下面编译生成“rping_test”可执行文件。
    make
  2. 执行如下命令,运行“rping_test”可执行文件。
    ./bin/rping_test <Number of cycles> <client NPU devlogicId> <target NPU devlogicId>
    • <Number of cycles>:从RPing功能初始化到释放RPing资源的全流程执行次数。
    • <client NPU devlogicId>:client NPU的Device逻辑ID。
    • <target NPU devlogicId>:target NPU的Device逻辑ID,若有多个target NPU,target NPU的逻辑ID直接使用空格间隔即可

    命令示例:

    ./bin/rping_test 1 0 1 2 3 4 5 6 7

    代表RPing功能执行一次,client NPU为Device 0,target NPU为Device 1到Device 7共7个NPU。