if __name__ == '__main__': session_config = tf.ConfigProto(allow_soft_placement=True) custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" # 开启profiling采集 custom_op.parameter_map["profiling_mode"].b = True # 仅采集任务轨迹数据 custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on"}') # 采集任务轨迹数据和迭代轨迹数据。可先仅采集任务轨迹数据,如果仍然无法分析到具体问题,可再采集迭代轨迹数据 # custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on","training_trace":"on","aicpu":"on","fp_point":"","bp_point":"","aic_metrics":"PipeUtilization"}') (npu_sess, npu_shutdown) = init_resource(config=session_config) tf.app.run() shutdown_resource(npu_sess, npu_shutdown) close_session(npu_sess)
需要注意,仅initialize_system中支持的配置项可在init_resoure函数的config中进行配置,若需配置其他功能,请在npu_config_proto函数的config_proto中进行配置。
session_config = tf.ConfigProto(allow_soft_placement=True) custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = 'NpuOptimizer' # 开启profiling采集 custom_op.parameter_map["profiling_mode"].b = True # 仅采集任务轨迹数据 custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on"}') # 采集任务轨迹数据和迭代轨迹数据。可先仅采集任务轨迹数据,如果仍然无法分析到具体问题,可再采集迭代轨迹数据 # custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on","training_trace":"on","aicpu":"on","fp_point":"","bp_point":"","aic_metrics":"PipeUtilization"}') config = npu_config_proto(config_proto=session_config) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) interaction_table.init.run()
您可以尝试先开启task_trace任务轨迹数据采集:
custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["profiling_mode"].b = True custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on"}') config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF with tf.Session(config=config) as sess: sess.run()
custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" custom_op.parameter_map["use_off_line"].b = True custom_op.parameter_map["profiling_mode"].b = True custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes('{"output":"/home/HwHiAiUser/output","task_trace":"on","training_trace":"on","aicpu":"on","fp_point":"","bp_point":"","aic_metrics":"PipeUtilization"}') config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF with tf.Session(config=config) as sess: sess.run()
需要注意的是,采集迭代轨迹数据需要fp_point(训练网络迭代轨迹正向算子的开始位置)和bp_point(反向算子的结束位置),可直接配置为空,由系统自动获取,或参考如何获取fp_point与bp_point。
相关接口详细介绍请参考Profiling。