本文整理汇总了Python中tensorrt.Runtime方法的典型用法代码示例。如果您正苦于以下问题:Python tensorrt.Runtime方法的具体用法?Python tensorrt.Runtime怎么用?Python tensorrt.Runtime使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorrt
的用法示例。
在下文中一共展示了tensorrt.Runtime方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def __init__(self, engine_path, input_names=None, output_names=None, final_shapes=None):
# load engine
self.logger = trt.Logger()
self.runtime = trt.Runtime(self.logger)
with open(engine_path, 'rb') as f:
self.engine = self.runtime.deserialize_cuda_engine(f.read())
self.context = self.engine.create_execution_context()
if input_names is None:
self.input_names = self._trt_input_names()
else:
self.input_names = input_names
if output_names is None:
self.output_names = self._trt_output_names()
else:
self.output_names = output_names
self.final_shapes = final_shapes
示例2: _load_from_state_dict
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def _load_from_state_dict(
self,
state_dict,
prefix,
local_metadata,
strict,
missing_keys,
unexpected_keys,
error_msgs,
):
engine_bytes = state_dict[prefix + "engine"]
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
self.engine = runtime.deserialize_cuda_engine(engine_bytes)
self.context = self.engine.create_execution_context()
self.input_names = state_dict[prefix + "input_names"]
self.output_names = state_dict[prefix + "output_names"]
示例3: get_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def get_engine(onnx_file_path, engine_file_path=""):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = 1 << 30 # 1GB
builder.max_batch_size = 1
# Parse model file
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
if os.path.exists(engine_file_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
else:
return build_engine()
示例4: get_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def get_engine(deploy_file, model_file, engine_path):
try:
with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
# Note that we have to provide the plugin factory when deserializing an engine built with an IPlugin or IPluginExt.
return runtime.deserialize_cuda_engine(f.read(), fc_factory)
except:
# Fallback to building an engine if the engine cannot be loaded for any reason.
engine = build_engine(deploy_file, model_file)
with open(engine_path, "wb") as f:
f.write(engine.serialize())
return engine
# Loads a test case into the provided pagelocked_buffer.
示例5: get_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def get_engine(onnx_file_path, engine_file_path=""):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = 1 << 28 # 256MiB
builder.max_batch_size = 1
# Parse model file
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
if os.path.exists(engine_file_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
else:
return build_engine()
示例6: load_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def load_engine(filename: str):
# Load serialized engine file into memory
with open(filename, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
示例7: _load_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def _load_engine(self):
TRTbin = 'models/ssd_mobilenet/TRT_ssd_mobilenet_v2_coco.bin'
with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
示例8: _load_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def _load_engine(self):
TRTbin = 'ssd/TRT_%s.bin' % self.model
with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
示例9: _load_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def _load_engine(self):
TRTbin = 'yolov3_onnx/%s.trt' % self.model
with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
示例10: get_engine
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def get_engine(onnx_file_path, engine_file_path=""):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = 1 << 30 # 1GB
builder.max_batch_size = 1
builder.fp16_mode = True
# Parse model file
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
if os.path.exists(engine_file_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
else:
return build_engine()
示例11: infer_with_trt
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def infer_with_trt(img, model):
"""Inference the image with TensorRT engine."""
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
with open(model, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
assert len(engine) == 2, 'ERROR: bad number of bindings'
host_input, cuda_input, host_output, cuda_output = init_trt_buffers(
cuda, trt, engine)
stream = cuda.Stream()
context = engine.create_execution_context()
context.set_binding_shape(0, (1, 224, 224, 3))
np.copyto(host_input, img.ravel())
cuda.memcpy_htod_async(cuda_input, host_input, stream)
if trt.__version__[0] >= '7':
context.execute_async_v2(bindings=[int(cuda_input), int(cuda_output)],
stream_handle=stream.handle)
else:
context.execute_async(bindings=[int(cuda_input), int(cuda_output)],
stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_output, cuda_output, stream)
stream.synchronize()
return host_output
示例12: __init__
# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import Runtime [as 别名]
def __init__(self, trt_engine_path, uff_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
"""Initializes TensorRT objects needed for model inference.
Args:
trt_engine_path (str): path where TensorRT engine should be stored
uff_model_path (str): path of .uff model
trt_engine_datatype (trt.DataType):
requested precision of TensorRT engine used for inference
batch_size (int): batch size for which engine
should be optimized for
"""
# We first load all custom plugins shipped with TensorRT,
# some of them will be needed during inference
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
# Initialize runtime needed for loading TensorRT engine from file
self.trt_runtime = trt.Runtime(TRT_LOGGER)
# TRT engine placeholder
self.trt_engine = None
# Display requested engine settings to stdout
print("TensorRT inference engine settings:")
print(" * Inference precision - {}".format(trt_engine_datatype))
print(" * Max batch size - {}\n".format(batch_size))
# If engine is not cached, we need to build it
if not os.path.exists(trt_engine_path):
# This function uses supplied .uff file
# alongside with UffParser to build TensorRT
# engine. For more details, check implmentation
self.trt_engine = engine_utils.build_engine(
uff_model_path, TRT_LOGGER,
trt_engine_datatype=trt_engine_datatype,
batch_size=batch_size)
# Save the engine to file
engine_utils.save_engine(self.trt_engine, trt_engine_path)
# If we get here, the file with engine exists, so we can load it
if not self.trt_engine:
print("Loading cached TensorRT engine from {}".format(
trt_engine_path))
self.trt_engine = engine_utils.load_engine(
self.trt_runtime, trt_engine_path)
# This allocates memory for network inputs/outputs on both CPU and GPU
self.inputs, self.outputs, self.bindings, self.stream = \
engine_utils.allocate_buffers(self.trt_engine)
# Execution context is needed for inference
self.context = self.trt_engine.create_execution_context()
# Allocate memory for multiple usage [e.g. multiple batch inference]
input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE)
self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))