sample_actlass_basic_matmul.py文件
import numpy as np import mskpp from mskpp.actlass import MatmulCoord, RowMajor def get_kernel(): src_path = "basic_matmul.cpp" # kernel实现文件 kernel_name = "BasicMatmul" # 需调用的kernel名 build_script = "make.sh" # kernel编译脚本 config = mskpp.ActlassConfig(src_path, kernel_name) gen_file = mskpp.Launcher(config).code_gen() kernel = mskpp.compile(build_script=build_script, launch_src_file=gen_file) return kernel @mskpp.autotune(configs=[ {'L1TileShape': 'MatmulShape<64, 64, 64>', 'L0Shape': 'MatmulShape<128, 256, 64>'}, {'L1TileShape': 'MatmulShape<64, 64, 128>', 'L0Shape': 'MatmulShape<128, 256, 64>'}, {'L1TileShape': 'MatmulShape<64, 128, 128>', 'L0Shape': 'MatmulShape<128, 256, 64>'}, {'L1TileShape': 'MatmulShape<64, 128, 128>', 'L0Shape': 'MatmulShape<64, 256, 64>'}, {'L1TileShape': 'MatmulShape<128, 128, 128>', 'L0Shape': 'MatmulShape<128, 256, 64>'}, ], warmup=500, repeat=10, device_ids=[0]) def basic_matmul(problem_shape, a, layout_a, b, layout_b, c, layout_c): kernel = get_kernel() blockdim = 20 return kernel[blockdim](problem_shape, a, layout_a, b, layout_b, c, layout_c, device_id=1) # 算子<<<>>>调用 def data_compare(a, b, c): golden = np.matmul(a, b) is_equal = np.array_equal(c, golden) print("compare result: {}".format(is_equal)) if __name__ == "__main__": m = 1024 n = 768 k = 1024 problem_shape = MatmulCoord(m, n, k) layout_a = RowMajor(m, k) layout_b = RowMajor(k, n) layout_c = RowMajor(m, n) a = np.random.randint(1, 2, [m, k]).astype(np.half) b = np.random.randint(1, 2, [k, n]).astype(np.half) c = np.zeros([m, n]).astype(np.half) basic_matmul(problem_shape, a, layout_a, b, layout_b, c, layout_c) data_compare(a, b, c)
父主题: 附录