Samples
Sample Code
To perform online inference, the model needs to be built and optimized when sess.run() is executed for the first time, which increases the time consumption. Therefore, try to minimize the initialization frequency across the app lifetime in the implementation. In this sample, the inference workflow is encapsulated in a Classifier object so that the app can manage the inference process by controlling the lifetime of the Classifier object.
See the infer_from_pb.py sample code as follows.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | # Load an already-trained .pb model to perform inference. import tensorflow as tf import os import argparse from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig import npu_bridge import time import numpy as np def parse_args(): ''' Set the model path, input, and output. ''' parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--batchsize', default=1, help="""batchsize""") parser.add_argument('--model_path', default='pb/resnet50HC.pb', help="""pb path""") parser.add_argument('--image_path', default='image-50000', help="""the data path""") parser.add_argument('--label_file', default='val_label.txt', help="""label file""") parser.add_argument('--input_tensor_name', default='input_data:0', help="""input_tensor_name""") parser.add_argument('--output_tensor_name', default='resnet_model/final_dense:0', help="""output_tensor_name""") args, unknown_args = parser.parse_known_args() if len(unknown_args) > 0: for bad_arg in unknown_args: print("ERROR: Unknown command line arg: %s" % bad_arg) raise ValueError("Invalid command line arg(s)") return args def read_file(image_name, path): ''' Read image information from the tag file. ''' with open(path, 'r') as cs: rs_list = cs.readlines() for name in rs_list: if image_name in str(name): num = str(name).split(" ")[1] break return int(num) + 1 def normalize(inputs): ''' Normalize input images. ''' mean = [121.0, 115.0, 100.0] std = [70.0, 68.0, 71.0] mean = tf.expand_dims(tf.expand_dims(mean, 0), 0) std = tf.expand_dims(tf.expand_dims(std, 0), 0) inputs = inputs - mean inputs = inputs * (1.0 / std) return inputs def image_process(image_path, label_file): ''' Preprocess input images. ''' imagelist = [] labellist = [] images_count = 0 for file in os.listdir(image_path): with tf.Session().as_default(): image_file = os.path.join(image_path, file) image_name = image_file.split('/')[-1].split('.')[0] #images preprocessing image= tf.gfile.FastGFile(image_file, 'rb').read() img = tf.image.decode_jpeg(image, channels=3) bbox = tf.constant([0.1,0.1,0.9,0.9]) img = tf.image.crop_and_resize(img[None, :, :, :], bbox[None, :], [0], [224, 224])[0] img = tf.clip_by_value(img, 0., 255.) img = normalize(img) img = tf.cast(img, tf.float16) images_count = images_count + 1 img = img.eval() imagelist.append(img) tf.reset_default_graph() # read image label from label_file label = read_file(image_name, label_file) labellist.append(label) return np.array(imagelist), np.array(labellist),images_count class Classifier(object): #set batchsize: args = parse_args() batch_size = int(args.batchsize) def __init__(self): # Configurations of model compilation and tuning on the Ascend AI Processor config = tf.ConfigProto() custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" # Configuration 1: Schedule the inference job to the Ascend AI Processor. custom_op.parameter_map["use_off_line"].b = True # Configuration 2: In the online inference scenario, you are advised to retain the default precision selection force_fp16 to achieve better performance. custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("force_fp16") # Configuration 3: Select the graph run mode. Set this parameter to 0 in the inference scenario or retain the default value 1 in the training scenario. custom_op.parameter_map["graph_run_mode"].i = 0 # Configuration 4: Disable remapping and MemoryOptimizer. config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF # Load the model and set the input and output nodes of the model. args = parse_args() self.graph = self.__load_model(args.model_path) self.input_tensor = self.graph.get_tensor_by_name(args.input_tensor_name) self.output_tensor = self.graph.get_tensor_by_name(args.output_tensor_name) # Model compilation is triggered when the sess.run() method is called for the first time, which takes a long time. You can tie the session to the object's lifetime. self.sess = tf.Session(config=config, graph=self.graph) def __load_model(self, model_file): """ Load a static graph. """ with tf.gfile.GFile(model_file, "rb") as gf: graph_def = tf.GraphDef() graph_def.ParseFromString(gf.read()) with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, name="") return graph def do_infer(self, batch_data): """ Start inference. """ out_list = [] total_time = 0 i = 0 for data in batch_data: t = time.time() out = self.sess.run(self.output_tensor, feed_dict={self.input_tensor: data}) if i > 0: total_time = total_time + time.time() - t i = i + 1 out_list.append(out) return np.array(out_list), total_time def batch_process(self, image_data, label_data): """ Batch processing """ # Obtain the batch information of the current input data and automatically adjust the data to a fixed batch. n_dim = image_data.shape[0] batch_size = self.batch_size # If the data is insufficient for the entire batch, pad the data. m = n_dim % batch_size if m < batch_size and m > 0: # The part without data is padded with 0s. pad = np.zeros((batch_size - m, 224, 224, 3)).astype(np.float32) image_data = np.concatenate((image_data, pad), axis=0) # Define the minimum batch that can be divided. mini_batch = [] mini_label = [] i = 0 while i < n_dim: # Define the Minis that can be divided into several batches mini_batch.append(image_data[i: i + batch_size, :, :, :]) mini_label.append(label_data[i: i + batch_size]) i += batch_size return mini_batch, mini_label def main(): args = parse_args() top1_count = 0 top5_count = 0 # Data preprocessing tf.reset_default_graph() print("########NOW Start Preprocess!!!#########") images, labels, images_count = image_process(args.image_path, args.label_file) # Batch processing print("########NOW Start Batch!!!#########") classifier = Classifier() batch_images, batch_labels= classifier.batch_process(images, labels) # Start inference. print("########NOW Start inference!!!#########") batch_logits, total_time = classifier.do_infer(batch_images) # Compute the accuracy. batchsize = int(args.batchsize) total_step = int(images_count / batchsize) print("########NOW Start Compute Accuracy!!!#########") for i in range(total_step): top1acc = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(batch_logits[i], 1), batch_labels[i]), tf.float32)) top5acc = tf.reduce_sum(tf.cast(tf.nn.in_top_k(batch_logits[i], batch_labels[i], 5), tf.float32)) with tf.Session().as_default(): tf.reset_default_graph() top1_count += top1acc.eval() top5_count += top5acc.eval() print('+----------------------------------------+') print('the correct num is {}, total num is {}.'.format(top1_count, total_step * batchsize)) print('Top1 accuracy:', top1_count / (total_step * batchsize) * 100) print('Top5 accuracy:', top5_count / (total_step * batchsize) * 100) print('images number = ', total_step * batchsize) print('images/sec = ', (total_step * batchsize) / total_time) print('+----------------------------------------+') if __name__ == '__main__': main() |
Instructions
The following uses the ResNet-50 model as an example to describe how to perform online inference.
- Download a pre-trained model.
- Download the trained original model file resnet50_tensorflow_1.7.pb from the resnet50_for_TensorFlow directory in the ModelZoo-TensorFlow repository of the Ascend community at Gitee. For details, refer to the corresponding README.
- Prepare the sample dataset ImageNet 2012. Download it from the official website at https://www.image-net.org/.
The downloaded pre-trained model can run inference only under batch size 1. To enable a larger batch size, freeze the .ckpt files generated at training time into a .pb model file with a user-defined batch size.
- Edit the inference script.
Create a model script file infer_from_pb.py and write code by referring to Sample Code.
- Configure environment variables on which the online inference process depends.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Set one of the following environment variables for the installation path of the infrastructure software on which training depends. The following assumes that the installation user is HwHiAiUser: # Method 1: Install Ascend-CANN-Toolkit for training on an Ascend AI device, which serves as the development environment. . /home/HwHiAiUser/Ascend/ascend-toolkit/set_env.sh # Method 2: Install Ascend-CANN-NNAE on an Ascend AI device. . /home/HwHiAiUser/Ascend/nnae/set_env.sh # TF Adapter Python library. ${TFPLUGIN_INSTALL_PATH} indicates the installation path of the TF Adapter package. export PYTHONPATH=${TFPLUGIN_INSTALL_PATH}:$PYTHONPATH # If multiple Python 3 versions exist in the operating environment, specify your Python installation path in the environment variable. The following takes Python 3.7.5 installation as an example. export PATH=/usr/local/python3.7.5/bin:$PATH export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH # Add the path of the current script to PYTHONPATH. For example: export PYTHONPATH="$PYTHONPATH:/root/models" # Job ID. export JOB_ID=10087 # User-defined job ID. Only letters, digits, hyphens (-), and underscores (_) are supported. You are advised not to use a number starting with 0.
If you need to upgrade GCC in OSs such as CentOS, Debian, and BC-Linux, add ${install_path}/lib64 to the LD_LIBRARY_PATH variable of the dynamic library search path. Replace {install_path} with the GCC installation path. For details, see 5.
- Run inference.
python3 infer_from_pb.py --model_path=./resnet50_tensorflow_1.7.pb --image_path=/data/dataset/imagenet2012/val --label_file=/data/dataset/imagenet2012/val_label.txt --input_tensor_name=Placeholder:0 --output_tensor_name=fp32_vars/dense/BiasAdd:0
The preceding command is an example only. Modify the arguments as needed. For details about how to determine the input/output node names of a .pb model, see Reading Node Names from the PB Model File.
When evaluating the online inference performance, we should consider the time required for operator and graph compilation in the first epoch, so start timing from the second epoch.