scatter_vnchwconv
功能说明
这是将NCHW格式转换为NC1HWC0格式的指令。在此指令中,MASK 寄存器无效。
对于b32类型:在源向量中,从每个32B块中提取4B,然后将它们组合成新的块。
for (e=0; e<16; e=e+2) for (k=0; k<8; k++) dst[e][k] = src[k][e/2] dst[e+1][k] = src[k+8][e/2]
对于b16类型:在源向量中,从每个32B块中提取2B,然后将它们组合成新的块。
for (e=0; e<16; e=e+2) for (k=0; k<16; k++) dst[e][k] = src[k][e]
对于b8类型,在源向量中,从每个32B块中提取1B,然后将它们组合成新的块,并且可以通过dstHighHalf与srcHighHalf参数指定是从地址的高半段取还是低半段取。
dstHighHalf = 0, srcHighHalf = 0: for (e=0; e<16; e++) for (k=0; k<16; k++) dst[e][k].B = src[k][e].B dstHighHalf = 1, srcHighHalf = 0 for (e=0; e<16; e++) for (k=0; k<16; k++) dst[e][k+16].B = src[k][e].B dstHighHalf = 0, srcHighHalf = 1 for (e=0; e<16; e++) for (k=0; k<16; k++) dst[e][k].B = src[k][e+16].B dstHighHalf = 1, srcHighHalf = 1 for (e=0; e<16; e++) for (k=0; k<16; k++) dst[e][k+16].B = src[k][e+16].B
接口原型
void scatter_vnchwconv_b32(ub_addr8_t dst, ub_addr8_t src, uint8_t repeat, uint16_t dstStride, uint16_t srcStride); void scatter_vnchwconv_b8(ub_addr8_t dst, ub_addr8_t src, uint8_t repeat, uint16_t dstStride, uint16_t srcStride, bool dstHighHalf, bool srcHighHalf); void scatter_vnchwconv_b8(ub_addr8_t dst, ub_addr8_t src, uint64_t config, bool dstHighHalf, bool srcHighHalf); void scatter_vnchwconv_b16(ub_addr8_t dst, ub_addr8_t src, uint8_t repeat, uint16_t dstStride, uint16_t srcStride);
流水类型
PIPE_V
父主题: UB内搬移