Skip to content

Commit

Permalink
create Int8 Calibrating
Browse files Browse the repository at this point in the history
  • Loading branch information
lzmisscc committed Jan 30, 2022
1 parent 585d302 commit d8615d3
Showing 1 changed file with 274 additions and 0 deletions.
274 changes: 274 additions & 0 deletions notebooks/build_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import sys

from numpy.core.fromnumeric import trace
sys.path.append("./")

import logging
import argparse

import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import traceback

from yolort.v5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages

logging.basicConfig(level=logging.INFO)
logging.getLogger("EngineBuilder").setLevel(logging.INFO)
log = logging.getLogger("EngineBuilder")


# Define some parameters
img_size = [320, 320]
stride = 32
score_thresh = 0.35
iou_thresh = 0.45
detections_per_img = 100
half = False
img_source = "val2017/"


class ImageBatcher:
def __init__(self, calib_shape=None, calib_dtype=None) -> None:
self.dataset = LoadImages(img_source, img_size=img_size, stride=stride, auto=False)
self.dtype = calib_dtype
self.batch_size = 1
self.shape = (self.batch_size, 3, *calib_shape)
self.num_images = len(self.dataset)
self.image_index = 0

def get_batch(self, ):
return iter(self.dataset)


class EngineCalibrator(trt.IInt8EntropyCalibrator2):
"""
Implements the INT8 Entropy Calibrator 2.
"""

def __init__(self, cache_file):
"""
:param cache_file: The location of the cache file.
"""
super().__init__()
self.cache_file = cache_file
self.image_batcher: ImageBatcher = None
self.batch_allocation = None
self.batch_generator = None

def set_image_batcher(self, image_batcher: ImageBatcher):
"""
Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
to be defined.
:param image_batcher: The ImageBatcher object
"""
self.image_batcher = image_batcher
size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape))
self.batch_allocation = cuda.mem_alloc(size)
self.batch_generator = self.image_batcher.get_batch()

def get_batch_size(self):
"""
Overrides from trt.IInt8EntropyCalibrator2.
Get the batch size to use for calibration.
:return: Batch size.
"""
if self.image_batcher:
return self.image_batcher.batch_size
return 1

def get_batch(self, names):
"""
Overrides from trt.IInt8EntropyCalibrator2.
Get the next batch to use for calibration, as a list of device memory pointers.
:param names: The names of the inputs, if useful to define the order of inputs.
:return: A list of int-casted memory pointers.
"""
if not self.image_batcher:
return None

log.info("Calibrating image ...")
try:
path, image, img_raw, _, s = next(self.batch_generator)
image = image[np.newaxis, :, :, :]
batch, _, _, _ = image.shape
self.image_batcher.image_index += 1

log.info("Calibrating image {} / {}".format(self.image_batcher.image_index, self.image_batcher.num_images))
cuda.memcpy_htod(self.batch_allocation, np.ascontiguousarray(batch))
return [int(self.batch_allocation)]
except StopIteration:
log.info("Finished calibration batches")
return None
except Exception:
traceback.print_exc()

def read_calibration_cache(self):
"""
Overrides from trt.IInt8EntropyCalibrator2.
Read the calibration cache file stored on disk, if it exists.
:return: The contents of the cache file, if any.
"""
if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
log.info("Using calibration cache file: {}".format(self.cache_file))
return f.read()

def write_calibration_cache(self, cache):
"""
Overrides from trt.IInt8EntropyCalibrator2.
Store the calibration cache to a file on disk.
:param cache: The contents of the calibration cache to store.
"""
with open(self.cache_file, "wb") as f:
log.info("Writing calibration cache data to: {}".format(self.cache_file))
f.write(cache)


class EngineBuilder:
"""
Parses an ONNX graph and builds a TensorRT engine from it.
"""

def __init__(self, verbose=False):
"""
:param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
"""
self.trt_logger = trt.Logger(trt.Logger.INFO)
if verbose:
self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE

trt.init_libnvinfer_plugins(self.trt_logger, namespace="")

self.builder = trt.Builder(self.trt_logger)
self.config = self.builder.create_builder_config()
self.config.max_workspace_size = 8 * (2 ** 30) # 8 GB

self.batch_size = None
self.network = None
self.parser = None

def create_network(self, onnx_path):
"""
Parse the ONNX graph and create the corresponding TensorRT network definition.
:param onnx_path: The path to the ONNX graph to load.
"""
network_flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

self.network = self.builder.create_network(network_flags)
self.parser = trt.OnnxParser(self.network, self.trt_logger)

onnx_path = os.path.realpath(onnx_path)
with open(onnx_path, "rb") as f:
if not self.parser.parse(f.read()):
log.error("Failed to load ONNX file: {}".format(onnx_path))
for error in range(self.parser.num_errors):
log.error(self.parser.get_error(error))
sys.exit(1)

inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]

log.info("Network Description")
for input in inputs:
self.batch_size = input.shape[0]
log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
for output in outputs:
log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))
assert self.batch_size > 0
self.builder.max_batch_size = self.batch_size

def create_engine(self, engine_path, precision, calib_input=None, calib_cache=None, calib_num_images=25000,
calib_batch_size=8, calib_preprocessor=None):
"""
Build the TensorRT engine and serialize it to disk.
:param engine_path: The path where to serialize the engine to.
:param precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
:param calib_input: The path to a directory holding the calibration images.
:param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
:param calib_num_images: The maximum number of images to use for calibration.
:param calib_batch_size: The batch size to use for the calibration process.
:param calib_preprocessor: The ImageBatcher preprocessor algorithm to use.
"""
engine_path = os.path.realpath(engine_path)
engine_dir = os.path.dirname(engine_path)
os.makedirs(engine_dir, exist_ok=True)
log.info("Building {} Engine in {}".format(precision, engine_path))

inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]

if precision == "fp16":
if not self.builder.platform_has_fast_fp16:
log.warning("FP16 is not supported natively on this platform/device")
else:
self.config.set_flag(trt.BuilderFlag.FP16)
elif precision == "int8":
if not self.builder.platform_has_fast_int8:
log.warning("INT8 is not supported natively on this platform/device")
else:
self.config.set_flag(trt.BuilderFlag.INT8)
self.config.int8_calibrator = EngineCalibrator(calib_cache)
if not os.path.exists(calib_cache):
calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
calib_dtype = trt.nptype(inputs[0].dtype)
self.config.int8_calibrator.set_image_batcher(
ImageBatcher(calib_shape, calib_dtype)
)

with self.builder.build_engine(self.network, self.config) as engine:
with open(engine_path, "wb") as f:
log.info("Serializing engine to file: {:}".format(engine_path))
f.write(engine.serialize())


def main(args):
builder = EngineBuilder(args.verbose)
builder.create_network(args.onnx)
builder.create_engine(args.engine, args.precision, args.calib_input, args.calib_cache, args.calib_num_images,
args.calib_batch_size, args.calib_preprocessor)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--onnx", help="The input ONNX model file to load")
parser.add_argument("-e", "--engine", help="The output path for the TRT engine")
parser.add_argument("-p", "--precision", default="fp16", choices=["fp32", "fp16", "int8"],
help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output")
parser.add_argument("--calib_input", help="The directory holding images to use for calibration")
parser.add_argument("--calib_cache", default="./calibration.cache",
help="The file path for INT8 calibration cache to use, default: ./calibration.cache")
parser.add_argument("--calib_num_images", default=10, type=int,
help="The maximum number of images to use for calibration, default: 25000")
parser.add_argument("--calib_batch_size", default=1, type=int,
help="The batch size for the calibration process, default: 1")
parser.add_argument("--calib_preprocessor", default="V2", choices=["V1", "V1MS", "V2"],
help="Set the calibration image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2")
args = parser.parse_args()
if not all([args.onnx, args.engine]):
parser.print_help()
log.error("These arguments are required: --onnx and --engine")
sys.exit(1)
if args.precision == "int8" and not any([args.calib_input, args.calib_cache]):
parser.print_help()
log.error("When building in int8 precision, either --calib_input or --calib_cache are required")
sys.exit(1)
main(args)

0 comments on commit d8615d3

Please sign in to comment.