本文整理匯總了Python中tensorrt.nptype方法的典型用法代碼示例。如果您正苦於以下問題:Python tensorrt.nptype方法的具體用法?Python tensorrt.nptype怎麽用?Python tensorrt.nptype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensorrt
的用法示例。
在下文中一共展示了tensorrt.nptype方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
示例2: __allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def __allocate_buffers(self, engine):
inputs = []
outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings
示例3: allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
"""Allocates all host/device in/out buffers required for an engine."""
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * \
engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
示例4: get_input_metadata
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def get_input_metadata(self):
inputs = OrderedDict()
active_profile = self.context.active_optimization_profile
bindings_per_profile = len(self.engine) // self.engine.num_optimization_profiles
logging.debug(
"Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}".format(
self.engine.num_optimization_profiles, bindings_per_profile, active_profile
)
)
start_binding = bindings_per_profile * active_profile
end_binding = start_binding + bindings_per_profile
logging.info("Start Binding: {:}, End Binding: {:}".format(start_binding, end_binding))
for binding in range(start_binding, end_binding):
if self.engine.binding_is_input(binding):
inputs[self.engine[binding]] = (
trt.nptype(self.engine.get_binding_dtype(binding)),
list(self.engine.get_binding_shape(binding)),
)
return inputs
示例5: allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
index = 0
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
else:
outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
index += 1
return inputs, outputs, bindings
示例6: allocate_buffers_torch
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
import torch
inputs = []
outputs = []
bindings = []
index = 0
dtype_map = np_to_torch_dtype_map()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
# Append the device buffer to device bindings.
bindings.append(device_mem.data_ptr())
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
else:
outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
index += 1
return inputs, outputs, bindings
示例7: allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
# Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
示例8: load_normalized_test_case
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_normalized_test_case(test_image, pagelocked_buffer):
# Converts the input image to a CHW Numpy array
def normalize_image(image):
# Resize, antialias and transpose the image to CHW.
c, h, w = ModelData.INPUT_SHAPE
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
return (image_arr / 255.0 - 0.45) / 0.225
# Normalize the image and copy to pagelocked memory.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
示例9: load_normalized_test_case
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_normalized_test_case(test_image, pagelocked_buffer):
# Converts the input image to a CHW Numpy array
def normalize_image(image):
# Resize, antialias and transpose the image to CHW.
c, h, w = ModelData.INPUT_SHAPE
return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# Normalize the image and copy to pagelocked memory.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
示例10: allocate_buffers
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
return h_input, d_input, h_output, d_output
示例11: load_input
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_input(input_size, host_buffer):
assert len(input_size) == 4
b, c, h, w = input_size
dtype = trt.nptype(DTYPE)
img_array = np.random.randn(c, h, w).astype(dtype).ravel()
np.copyto(host_buffer, img_array)
示例12: alloc_buf
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def alloc_buf(engine):
# host cpu mem
h_in_size = trt.volume(engine.get_binding_shape(0))
h_out_size = trt.volume(engine.get_binding_shape(1))
h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
# allocate gpu mem
in_gpu = cuda.mem_alloc(in_cpu.nbytes)
out_gpu = cuda.mem_alloc(out_cpu.nbytes)
stream = cuda.Stream()
return in_cpu, out_cpu, in_gpu, out_gpu, stream
示例13: get_input_metadata_from_profile
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def get_input_metadata_from_profile(profile, network):
input_metadata = OrderedDict()
for index in range(network.num_inputs):
tensor = network.get_input(index)
if tensor.is_shape_tensor:
shapes = profile.get_shape_input(tensor.name)
else:
shapes = profile.get_shape(tensor.name)
if tuple(shapes[0]) != tuple(shapes[1]):
logging.warning("In profile 0, min != max, using opt shapes for calibration")
# Always use opt shape
input_metadata[tensor.name] = (trt.nptype(tensor.dtype), shapes[1])
return input_metadata
示例14: from_engine
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def from_engine(engine):
buffers = Buffers()
for binding in engine:
dtype = trt.nptype(engine.get_binding_dtype(binding))
buffers.device_buffers[binding] = DeviceBuffer(dtype=dtype)
if not engine.binding_is_input(binding):
buffers.host_outputs[binding] = HostBuffer(dtype=dtype)
return buffers
示例15: run_speed_eval
# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings
inputs, outputs, bindings = allocate_buffers(self.engine)
# warm run
for i in range(warm_run_loops):
[cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
[cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
# real run
logging.info('Start real run loop.')
sum_time_data_copy = 0.
sum_time_inference_only = 0.
for i in range(real_run_loops):
time_start = time.time()
[cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
sum_time_data_copy += time.time() - time_start
time_start = time.time()
self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
sum_time_inference_only += time.time() - time_start
time_start = time.time()
[cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
sum_time_data_copy += time.time() - time_start
logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
% ((sum_time_data_copy + sum_time_inference_only) * 1000,
(sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only)))