开发者
资源

随机数生成(Random)

除了可以下发Kernel执行任务外,Runtime还提供下发Reduce和随机数生成的内置系统任务的功能。(系统任务区别于Kernel任务在于无需用户提供执行代码)。 系统任务可以下发到某条Stream异步执行,同样遵循同一流上任务保序执行的规则。

通过aclrtReduceAsync接口可以下发执行Reduce操作任务,调用代码示例如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    aclInit(NULL);
    aclrtSetDevice(0);
    aclrtStream stream;
    aclrtCreateStream(&stream);
    // 准备 Host 数据
    const int count = 4;
    float hostInput[4] = {1.0, 2.0, 3.0, 4.0};
    float hostOutput[4] = {0, 0, 0, 0};
    size_t size = count * sizeof(float);
    // 申请 Device 内存
    void *devInput = NULL;
    void *devOutput = NULL;
    aclrtMalloc(&devInput, size, ACL_MEM_MALLOC_HUGE_FIRST);
    aclrtMalloc(&devOutput, size, ACL_MEM_MALLOC_HUGE_FIRST);
    // 拷贝数据到 Device
    aclrtMemcpy(devInput, size, hostInput, size, ACL_MEMCPY_HOST_TO_DEVICE);
    aclrtMemcpy(devOutput, size, hostInput, size, ACL_MEMCPY_HOST_TO_DEVICE);
    // 调用 aclrtReduceAsync
    aclrtReduceAsync(devOutput,
        devInput,
        size,
        ACL_RT_MEMCPY_SDMA_AUTOMATIC_SUM,  // 归约类型
        ACL_FLOAT,                         // 数据类型
        stream,
        NULL);
    // 同步 stream
    aclrtSynchronizeStream(stream);
    // 拷回结果
    aclrtMemcpy(hostOutput, size, devOutput, size, ACL_MEMCPY_DEVICE_TO_HOST);
    for (int i = 0; i < count; i++) {
        printf("Reduce SUM result[%d] = %f\n", i, hostOutput[i]);
    }
    /* 预期如下结果
    Reduce SUM result[0] = 2.000000
    Reduce SUM result[1] = 4.000000
    Reduce SUM result[2] = 6.000000
    Reduce SUM result[3] = 8.000000
    */
    // 释放资源
    aclrtFree(devInput);
    aclrtFree(devOutput);
    aclrtDestroyStream(stream);
    aclrtResetDeviceForce(0);
    aclFinalize();

通过aclrtRandomNumAsync执行随机数生成任务,调用代码示例如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
aclError NormalFloatAsync(
    float mean, float stddev, uint64_t seed, uint64_t num, void *counterDevAddr, void *devOutput, aclrtStream stream)
{
    aclrtRandomNumTaskInfo taskInfo = {};
    taskInfo.dataType = ACL_FLOAT;
    taskInfo.randomNumFuncParaInfo.funcType = ACL_RT_RANDOM_NUM_FUNC_TYPE_NORMAL_DIS;
    taskInfo.randomParaAddr = NULL;
    taskInfo.randomCounterAddr = counterDevAddr;
    taskInfo.randomResultAddr = devOutput;
    memcpy(taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.valueOrAddr, &mean, sizeof(float));
    taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.size = sizeof(float);
    taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.mean.isAddr = 0;
    memcpy(taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.valueOrAddr, &stddev, sizeof(float));
    taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.size = sizeof(float);
    taskInfo.randomNumFuncParaInfo.paramInfo.normalDisInfo.stddev.isAddr = 0;
    memcpy(taskInfo.randomSeed.valueOrAddr, &seed, sizeof(uint64_t));
    taskInfo.randomSeed.size = sizeof(uint64_t);
    taskInfo.randomSeed.isAddr = 0;
    memcpy(taskInfo.randomNum.valueOrAddr, &num, sizeof(uint64_t));
    taskInfo.randomNum.size = sizeof(uint64_t);
    taskInfo.randomNum.isAddr = 0;
    return aclrtRandomNumAsync(&taskInfo, stream, NULL);
}
int main()
{
    aclError ret;
    // 初始化 ACL
    ret = aclInit(NULL);
    ret = aclrtSetDevice(0);
    aclrtStream stream;
    ret = aclrtCreateStream(&stream);
    uint64_t num = 128;
    size_t size = num * sizeof(uint64_t);  // 申请足够大内存
    // 申请 Device 内存
    void *devOutput = NULL;
    ret = aclrtMalloc(&devOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY);
    // 准备 Host 数据
    void *hostOutput = malloc(size);
    // 申请存放随机数状态 counter 的device内存 (要求 16Byte)
    void *counterAddr = NULL;
    ret = aclrtMalloc((void **)&counterAddr, 16, ACL_MEM_MALLOC_NORMAL_ONLY);
    
    float mean = 3.0;
    float stddev = 2.0;    
    ret =NormalFloatAsync(mean, stddev, 0, num, counterAddr, devOutput, stream);
    // 同步 stream
    aclrtSynchronizeStream(stream);
    // 拷回结果
    aclrtMemcpy(hostOutput, size, devOutput, size, ACL_MEMCPY_DEVICE_TO_HOST);
    
    // 释放资源
    free(hostOutput);
    aclrtFree(devOutput);
    aclrtFree(counterAddr);
    aclrtDestroyStream(stream);
    aclrtResetDeviceForce(0);
    aclFinalize();
    return 0;
}