本文整理匯總了Python中pycuda.driver.memcpy_htod_async方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.memcpy_htod_async方法的具體用法?Python driver.memcpy_htod_async怎麽用?Python driver.memcpy_htod_async使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver
的用法示例。
在下文中一共展示了driver.memcpy_htod_async方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(context, input_img, output_size, batch_size):
# Load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
# Convert input data to float32
input_img = input_img.astype(np.float32)
# Create host buffer to receive data
output = np.empty(output_size, dtype = np.float32)
# Allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Synchronize threads
stream.synchronize()
# Return predictions
return output
示例2: inference_image
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def inference_image(context, input_img, batch_size):
# load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
# output vector size
output_size = 1000
# create output array
output = np.empty(output_size, dtype=np.float32)
# allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
# create input/output bindings
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# transfer predictions
cuda.memcpy_dtoh_async(output, d_output, stream)
# synchronize threads
stream.synchronize()
return output
示例3: prediction
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def prediction(self, img):
img_resized = _preprocess_trt(img, self.input_shape)
np.copyto(self.host_inputs[0], img_resized.ravel())
cuda.memcpy_htod_async(
self.cuda_inputs[0], self.host_inputs[0], self.stream)
self.context.execute_async(
batch_size=1,
bindings=self.bindings,
stream_handle=self.stream.handle)
cuda.memcpy_dtoh_async(
self.host_outputs[1], self.cuda_outputs[1], self.stream)
cuda.memcpy_dtoh_async(
self.host_outputs[0], self.cuda_outputs[0], self.stream)
self.stream.synchronize()
output = self.host_outputs[0]
return output
示例4: detect
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def detect(self, img, conf_th=0.3):
"""Detect objects in the input image."""
img_resized = _preprocess_trt(img, self.input_shape)
np.copyto(self.host_inputs[0], img_resized.ravel())
cuda.memcpy_htod_async(
self.cuda_inputs[0], self.host_inputs[0], self.stream)
self.context.execute_async(
batch_size=1,
bindings=self.bindings,
stream_handle=self.stream.handle)
cuda.memcpy_dtoh_async(
self.host_outputs[1], self.cuda_outputs[1], self.stream)
cuda.memcpy_dtoh_async(
self.host_outputs[0], self.cuda_outputs[0], self.stream)
self.stream.synchronize()
output = self.host_outputs[0]
return _postprocess_trt(img, output, conf_th, self.output_layout)
示例5: do_inference
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
"""do_inference (for TensorRT 6.x or lower)
This function is generalized for multiple inputs/outputs.
Inputs and outputs are expected to be lists of HostDeviceMem objects.
"""
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(batch_size=batch_size,
bindings=bindings,
stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
示例6: do_inference_v2
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def do_inference_v2(context, bindings, inputs, outputs, stream):
"""do_inference_v2 (for TensorRT 7.0+)
This function is generalized for multiple inputs/outputs for full
dimension networks.
Inputs and outputs are expected to be lists of HostDeviceMem objects.
"""
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
示例7: execute_async
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def execute_async(self, batch_size):
[
cuda.memcpy_htod_async(inp.device, inp.host[:batch_size],
self.stream) for inp in self.inputs
if inp.device_input is False
]
self.context.execute_async(
batch_size=batch_size,
bindings=self.bindings,
stream_handle=self.stream.handle)
[
cuda.memcpy_dtoh_async(out.host[:batch_size], out.device,
self.stream) for out in self.outputs
]
self.stream.synchronize()
return {n: v.host[:batch_size] for n, v in self.output_dict.items()}
示例8: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(context, input_img, output_size, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#convert input data to Float32
input_img = input_img.astype(np.float32)
#create output array to receive data
output = np.empty(output_size, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#synchronize threads
stream.synchronize()
#return predictions
return output
示例9: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例10: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(context, input_img, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#convert input data to Float32
input_img = input_img.astype(np.float32)
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例11: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例12: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(context, input_img, output_size, batch_size):
# Load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
# Convert input data to Float32
input_img = input_img.astype(np.float32)
# Create output array to receive data
output = np.empty(output_size, dtype = np.float32)
# Alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Return predictions
return output
示例13: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(context, input_img, output_size, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#convert input data to Float32
input_img = input_img.astype(np.float32)
#create output array to receive data
output = np.empty(output_size, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例14: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例15: do_inference
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_htod_async [as 別名]
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# Transfer input data to the GPU.
cuda.memcpy_htod_async(d_input, h_input, stream)
# Run inference.
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# Synchronize the stream
stream.synchronize()
# The Onnx path is used for Onnx models.