Maintainability and Testability

AscendSiPBoost provides the following maintainability and testability capabilities:

Return Value

The following table lists the return values of the AscendSiPBoost APIs.

Status Code Name

Status Code Value

Error Code Description

Fault Locating Method

ACL_SUCCESS

0

Execution succeeded.

-

ACL_ERROR_INVALID_PARAM

100000

Parameter verification failed.

Check whether the input parameter value of the API is correct.

ACL_ERROR_OP_INPUT_NOT_MATCH

100021

Single-operator input mismatch.

Check whether the operator input is correct.

ACL_ERROR_OP_OUTPUT_NOT_MATCH

100022

Single-operator output mismatch.

Check whether the operator output is correct.

ACL_ERROR_UNSUPPORTED_DATA_TYPE

100026

Data type not supported.

Check that the data type exists or is supported.

ACL_ERROR_FORMAT_NOT_MATCH

100027

Format mismatch.

Check whether the format is correct.

ACL_ERROR_API_NOT_SUPPORT

200001

API not supported.

Check whether the called API is supported.

ACL_ERROR_INTERNAL_ERROR

500000

Unknown internal error.

-

Log System

The log system of AscendSiPBoost allows you to classify logs and output them as either standard logs or files.

  • Log classification
    Logs are classified into four levels from high to low: ERROR, WARN, INFO, and DEBUG, as shown in Table 1. The log level is controlled by the environment variable ASCEND_GLOBAL_LOG_LEVEL. The default value is INFO.
    Table 1 Log levels

    Level

    Description

    ERROR

    Error information. Error and exception information is printed at this level.

    WARN

    Warning information, which indicates that a potential error may occur.

    INFO (default)

    Data information, which is related to the operator and the entire graph. You can learn the running status of the entire graph or a single operator by viewing the INFO log.

    DEBUG

    Debug information, which records details about the acceleration library code. Developers can debug the framework code by viewing debug logs.

  • Log storage
    1. Log files are stored in [LOG_PATH]/log/asdsip.

      [LOG_PATH] is controlled by the environment variable ASCEND_PROCESS_LOG_PATH (see Environment Variables). The default value is ~/ascend.

    2. The log file name format is asdsip_[PID]_[Year]_[Month]_[Day]_[Hour]_[Minute]_[Second].log.

      [PID] indicates the thread ID. Example: asdsip_253440_20231102065052.log.

  • Space management
    1. The maximum size of a log file is 20 MB, and a maximum of 50 log files can be stored. If the number of log files (stored in the standard naming format) in the current directory reaches the maximum, the earliest log files will be deleted based on the timestamps.
    2. Before generating a log file, the system checks the space of the log storage directory. If the space is less than 1 GB, no more log file will be generated.

DumpTensor

The dump tensor function of AscendSiPBoost enables you to print or save intermediate computation data, as well as the inputs and outputs of operators. It is applicable to the following scenario:

Using AscendSiPBoost operators and customizing the computing process

When using the AscendSiPBoost operators, you can print or save the inputs or outputs of the operators to help you analyze or check whether the calculation results are correct.

  • Call the AscendSiPBoost operators and custom the computing process on the CPP.

    When calling the operator on the CPP, you can print or save data based on the functions of C++. Example:

    #include <iostream>
    #include <fstream>
    #include <cmath>
    #include <random>
    #include <complex>
    #include "asdsip.h"
    #include "acl/acl.h"
    #include "acl_meta.h"
    using namespace AsdSip;
    #define ASD_STATUS_CHECK(err)                                                \
        do {                                                                     \
            AsdSip::AspbStatus err_ = (err);                                     \
            if (err_ != AsdSip::NO_ERROR) {                                      \
                std::cout << "Execute failed." << std::endl; \
                exit(-1);                                                        \
            } else {                                                             \
                std::cout << "Execute successfully." << std::endl;               \
            }                                                                    \
        } while (0)
    void printTensor(const std::complex<float> *tensorData, int64_t tensorSize)
    {
        for (int64_t i = 0; i < tensorSize; i++) {
            std::cout << tensorData[i] << " ";
        }
        std::cout << std::endl;
    }
    #define CHECK_RET(cond, return_expr) \
        do {                             \
            if (!(cond)) {               \
                return_expr;             \
            }                            \
        } while (0)
    #define LOG_PRINT(message, ...)         \
        do {                                \
            printf(message, ##__VA_ARGS__); \
        } while (0)
    int64_t GetShapeSize(const std::vector<int64_t> &shape)
    {
        int64_t shapeSize = 1;
        for (auto i : shape) {
            shapeSize *= i;
        }
        return shapeSize;
    }
    int Init(int32_t deviceId, aclrtStream *stream)
    {
        // Initialize ACL. This code is written in a fixed format.
        auto ret = aclInit(nullptr);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret);
        ret = aclrtSetDevice(deviceId);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret);
        ret = aclrtCreateStream(stream);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret);
        return 0;
    }
    template <typename T>
    int CreateAclTensor(const std::vector<T> &hostData, const std::vector<int64_t> &shape, void **deviceAddr,
        aclDataType dataType, aclTensor **tensor)
    {
        auto size = GetShapeSize(shape) * sizeof(T);
        // Call aclrtMalloc to allocate memory on the device.
        auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret);
        // Call aclrtMemcpy to copy the data on the host to the memory on the device.
        ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret);
        // Compute the strides of the contiguous tensor.
        std::vector<int64_t> strides(shape.size(), 1);
        for (int64_t i = shape.size() - 2; i >= 0; i--) {
            strides[i] = shape[i + 1] * strides[i + 1];
        }
        // Call the aclCreateTensor API to create an ACL tensor.
        *tensor = aclCreateTensor(shape.data(),
            shape.size(),
            dataType,
            strides.data(),
            0,
            aclFormat::ACL_FORMAT_ND,
            shape.data(),
            shape.size(),
            *deviceAddr);
        return 0;
    }
    void printTensor(std::vector<std::complex<float>> tensorData, int64_t tensorSize)
    {
        for (int64_t i = 0; i < tensorSize; i++) {
            std::cout << tensorData[i] << " ";
        }
        std::cout << std::endl;
    }
    int main(int argc, char **argv)
    {
        int deviceId = 0;
        aclrtStream stream;
        auto ret = Init(deviceId, &stream);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret);
        int64_t n = 8;
        int64_t xSize = 8;
        int64_t ySize = 8;
        std::vector<std::complex<float>> tensorInXData;
        tensorInXData.reserve(xSize);
        for (int64_t i = 0; i < xSize; i++) {
            tensorInXData[i] = {2.0, (float)(1.0 + i)};
        }
        std::vector<std::complex<float>> tensorInYData;
        tensorInYData.reserve(ySize);
        for (int64_t i = 0; i < ySize; i++) {
            tensorInYData[i] = {3.0, 4.0};
        }
        int64_t resultSize = 1;
        std::vector<std::complex<float>> resultData;
        resultData.reserve(resultSize);
        std::cout << "------- input TensorInX -------" << std::endl;
        printTensor(tensorInXData.data(), xSize);
        std::cout << "------- input TensorInY -------" << std::endl;
        printTensor(tensorInYData.data(), ySize);
        std::vector<int64_t> xShape = {xSize};
        std::vector<int64_t> yShape = {ySize};
        std::vector<int64_t> resultShape = {resultSize};
        aclTensor *inputX = nullptr;
        aclTensor *inputY = nullptr;
        aclTensor *result = nullptr;
        void *inputXDeviceAddr = nullptr;
        void *inputYDeviceAddr = nullptr;
        void *resultDeviceAddr = nullptr;
        ret = CreateAclTensor(tensorInXData, xShape, &inputXDeviceAddr, aclDataType::ACL_COMPLEX64, &inputX);
        CHECK_RET(ret == ACL_SUCCESS, return ret);
        ret = CreateAclTensor(tensorInYData, yShape, &inputYDeviceAddr, aclDataType::ACL_COMPLEX64, &inputY);
        CHECK_RET(ret == ACL_SUCCESS, return ret);
        ret = CreateAclTensor(resultData, resultShape, &resultDeviceAddr, aclDataType::ACL_COMPLEX64, &result);
        CHECK_RET(ret == ACL_SUCCESS, return ret);
        asdBlasHandle handle;
        asdBlasCreate(handle);
        size_t lwork = 0;
        void *buffer = nullptr;
        asdBlasMakeDotPlan(handle);
        asdBlasGetWorkspaceSize(handle, &lwork);
        std::cout << "lwork = " << lwork << std::endl;
        if (lwork > 0) {
            ret = aclrtMalloc(&buffer, static_cast<int64_t>(lwork), ACL_MEM_MALLOC_HUGE_FIRST);
            CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret);
        }
        asdBlasSetWorkspace(handle, buffer);
        asdBlasSetStream(handle, stream);
        ASD_STATUS_CHECK(asdBlasCdotu(handle, n, inputX, 1, inputY, 1, result));
        asdBlasSynchronize(handle);
        asdBlasDestroy(handle);
        ret = aclrtMemcpy(resultData.data(),
            resultSize * sizeof(std::complex<float>),
            resultDeviceAddr,
            resultSize * sizeof(std::complex<float>),
            ACL_MEMCPY_DEVICE_TO_HOST);
        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret);
        std::cout << "------- result -------" << std::endl;
        printTensor(resultData.data(), resultSize);
        std::ofstream file("result.bin", std::ios::binary | std::ios::out);
        file.write((const char *)resultData.data(), sizeof(std::complex<float>) * resultSize);
        file.close();
        std::cout << "result.bin saved." << std::endl;
        aclDestroyTensor(inputX);
        aclDestroyTensor(inputY);
        aclDestroyTensor(result);
        aclrtFree(inputXDeviceAddr);
        aclrtFree(inputYDeviceAddr);
        aclrtFree(resultDeviceAddr);
        if (lwork > 0) {
            aclrtFree(buffer);
        }
        aclrtDestroyStream(stream);
        aclrtResetDevice(deviceId);
        aclFinalize();
        return 0;
    }