本文整理匯總了Python中pycuda.driver.Stream方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.Stream方法的具體用法?Python driver.Stream怎麽用?Python driver.Stream使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver
的用法示例。
在下文中一共展示了driver.Stream方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
# Load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
# Convert input data to float32
input_img = input_img.astype(np.float32)
# Create host buffer to receive data
output = np.empty(output_size, dtype = np.float32)
# Allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Synchronize threads
stream.synchronize()
# Return predictions
return output
示例2: inference_image
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def inference_image(context, input_img, batch_size):
# load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
# output vector size
output_size = 1000
# create output array
output = np.empty(output_size, dtype=np.float32)
# allocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
# create input/output bindings
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# transfer predictions
cuda.memcpy_dtoh_async(output, d_output, stream)
# synchronize threads
stream.synchronize()
return output
示例3: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
示例4: __init__
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, model, input_shape, output_layout=7):
"""Initialize TensorRT plugins, engine and conetxt."""
self.model = model
self.input_shape = input_shape
self.output_layout = output_layout
self.trt_logger = trt.Logger(trt.Logger.INFO)
self._load_plugins()
self.engine = self._load_engine()
self.host_inputs = []
self.cuda_inputs = []
self.host_outputs = []
self.cuda_outputs = []
self.bindings = []
self.stream = cuda.Stream()
self.context = self._create_context()
示例5: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def allocate_buffers(engine):
"""Allocates all host/device in/out buffers required for an engine."""
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * \
engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
示例6: __init__
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, layers=None, delta=None, stream = None, max_batch_size=32, max_streams=10, epochs = 10):
self.network = []
self.network_summary = []
self.network_mem = []
if stream is not None:
self.stream = stream
else:
self.stream = drv.Stream()
if delta is None:
delta = 0.0001
self.delta = delta
self.max_batch_size=max_batch_size
self.max_streams = max_streams
self.epochs = epochs
if layers is not None:
for layer in layers:
add_layer(self, layer)
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:27,代碼來源:deep_neural_network.py
示例7: __init__
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, context: trt.IExecutionContext, stream=None, device=None, cuda_device=None, cuda_context=None):
self.engine = context.engine
if device is None:
self.torch_device = torch.device("cuda:0")
else:
self.torch_device = device
inputs, outputs, bindings = allocate_buffers_torch(self.engine, self.torch_device)
self.context = context
self.inputs = inputs
self.outputs = outputs
self.bindings = bindings
self.input_dict = {mem.name: mem for mem in inputs}
self.output_dict = {mem.name: mem for mem in outputs}
if stream is None:
self.stream = cuda.Stream()
self._batch_size = None
self.cuda_device = cuda_device
self.cuda_context = cuda_context
示例8: get_async
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def get_async(self, stream):
"""Copy and return the host buffer data.
Parameters
----------
stream : pycuda.driver.Stream
The cuda stream to copy data.
Returns
-------
numpy.array
The numpy array taking the data.
"""
src = self.device_buffer
dst = self.host_buffer
src.get_async(stream, dst)
return dst
示例9: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#convert input data to Float32
input_img = input_img.astype(np.float32)
#create output array to receive data
output = np.empty(output_size, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#synchronize threads
stream.synchronize()
#return predictions
return output
示例10: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例11: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#convert input data to Float32
input_img = input_img.astype(np.float32)
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例12: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例13: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
# Load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
# Convert input data to Float32
input_img = input_img.astype(np.float32)
# Create output array to receive data
output = np.empty(output_size, dtype = np.float32)
# Alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# Execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Return predictions
return output
示例14: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#convert input data to Float32
input_img = input_img.astype(np.float32)
#create output array to receive data
output = np.empty(output_size, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例15: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output