本文整理匯總了Python中pycuda.driver.pagelocked_empty方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.pagelocked_empty方法的具體用法?Python driver.pagelocked_empty怎麽用?Python driver.pagelocked_empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver
的用法示例。
在下文中一共展示了driver.pagelocked_empty方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
示例2: __allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def __allocate_buffers(self, engine):
inputs = []
outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings
示例3: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
"""Allocates all host/device in/out buffers required for an engine."""
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * \
engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
示例4: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
index = 0
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
else:
outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
index += 1
return inputs, outputs, bindings
示例5: allocate_buffers_torch
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
import torch
inputs = []
outputs = []
bindings = []
index = 0
dtype_map = np_to_torch_dtype_map()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
# Append the device buffer to device bindings.
bindings.append(device_mem.data_ptr())
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
else:
outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
index += 1
return inputs, outputs, bindings
示例6: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例7: normalize
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def normalize(data):
#allocate pagelocked memory
norm_data = cuda.pagelocked_empty(data.shape, np.float32)
print("\n\n\n---------------------------", "\n")
for i in range(len(data)):
print(" .:-=+*#%@"[data[i] // 26] + ("\n" if ((i + 1) % 28 == 0) else ""), end="");
norm_data[i] = 1.0 - data[i] / 255.0
print("\n")
return norm_data
示例8: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(context, input_img, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#convert input data to Float32
input_img = input_img.astype(np.float32)
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例9: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(engine, input_img, batch_size):
#load engine
context = engine.create_execution_context()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype = np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例10: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
# Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
示例11: infer
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(context, input_img, batch_size):
#load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
#create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
#convert input data to Float32
input_img = input_img.astype(np.float32)
#Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
#alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
#transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
#execute model
context.enqueue(batch_size, bindings, stream.handle, None)
#transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
#return predictions
return output
示例12: _create_context
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def _create_context(self):
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * \
self.engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
self.bindings.append(int(cuda_mem))
if self.engine.binding_is_input(binding):
self.host_inputs.append(host_mem)
self.cuda_inputs.append(cuda_mem)
else:
self.host_outputs.append(host_mem)
self.cuda_outputs.append(cuda_mem)
return self.engine.create_execution_context()
示例13: allocate
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate(n, dtype=numpy.float32):
""" allocate context-portable pinned host memory """
return drv.pagelocked_empty(int(n), dtype, order='C', mem_flags=drv.host_alloc_flags.PORTABLE)
示例14: allocate
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate(n, dtype=numpy.float32):
""" allocate context-portable device mapped host memory """
return drv.pagelocked_empty(int(n), dtype, order='C', mem_flags=drv.host_alloc_flags.PORTABLE|drv.host_alloc_flags.DEVICEMAP)
示例15: allocate_buffers
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
return h_input, d_input, h_output, d_output