img2colv2_cbuf_to_ca
功能说明
该指令将数据从L1搬运到L0A上,直观来说,img2col意味着在特征图中滑动的3D核窗口数据扩展为一行数据。卷积核窗口将在水平方向上滑动Wo步,在垂直方向上滑动Ho步。然后生成Wo*Ho行。
假如原始的特征图大小为Ci*Hi*Wi的三维格式,搬运后生成的二维矩阵的水平大小为(Hk*Wk*Ci),而竖直方向的大小为(Wo*Ho)。
- Co表示卷积核个数,也就是输出通道数;
- Wk表示卷积核的水平尺寸;
- Hk表示卷积核的竖直尺寸;
- Ci表示输入通道数;
- Hi表示输入特征图竖直尺寸;
- Wi表示输入特征图水平尺寸;
- Ho表示输出特征图竖直尺寸;
- Wo表示输出特征图水平尺寸;
两个卷积核窗口之间的重叠数据也将被重新加载到特征图矩阵中,这使得它看起来包含了许多冗余。但它将三维数据格式的卷积计算转化为简单的矩阵乘法。
接口原型
void img2colv2_cbuf_to_ca(__ca__ half *dst, __cbuf__ half *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ uint32_t *dst, __cbuf__ uint32_t *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ int32_t *dst, __cbuf__ int32_t *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca_s4(__ca__ void *dst, __cbuf__ void *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ float *dst, __cbuf__ float *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ uint8_t *dst, __cbuf__ uint8_t *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ int8_t *dst, __cbuf__ int8_t *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel); void img2colv2_cbuf_to_ca(__ca__ bfloat16_t *dst, __cbuf__ bfloat16_t *src, uint16_t stepK, uint16_t stepM, uint16_t posK, uint16_t posM, uint8_t strideW, uint8_t strideH, uint8_t Wk, uint8_t Hk, uint8_t dilationW, uint8_t dilationH, bool filterW, bool filterH, bool transpose, bool fmatrixCtrl, uint16_t sizeChannel);
流水类型
PIPE_MTE1
父主题: 卷积输入搬运