當前位置: 首頁>>代碼示例>>Python>>正文


Python driver.pagelocked_empty方法代碼示例

本文整理匯總了Python中pycuda.driver.pagelocked_empty方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.pagelocked_empty方法的具體用法?Python driver.pagelocked_empty怎麽用?Python driver.pagelocked_empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pycuda.driver的用法示例。


在下文中一共展示了driver.pagelocked_empty方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects. 
開發者ID:aimuch,項目名稱:iAI,代碼行數:24,代碼來源:common.py

示例2: __allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings 
開發者ID:becauseofAI,項目名稱:lffd-pytorch,代碼行數:20,代碼來源:predict_tensorrt.py

示例3: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream 
開發者ID:jkjung-avt,項目名稱:tensorrt_demos,代碼行數:23,代碼來源:yolov3.py

示例4: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:23,代碼來源:common.py

示例5: allocate_buffers_torch

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:25,代碼來源:common.py

示例6: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:29,代碼來源:uff_mnist.py

示例7: normalize

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def normalize(data):
    #allocate pagelocked memory
    norm_data = cuda.pagelocked_empty(data.shape, np.float32)
    print("\n\n\n---------------------------", "\n")
    for i in range(len(data)):
        print(" .:-=+*#%@"[data[i] // 26] + ("\n" if ((i + 1) % 28 == 0) else ""), end="");
        norm_data[i] = 1.0 - data[i] / 255.0
    print("\n")
    return norm_data 
開發者ID:aimuch,項目名稱:iAI,代碼行數:11,代碼來源:uff_mnist.py

示例8: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:31,代碼來源:tf_to_trt.py

示例9: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)

    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size

    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:31,代碼來源:onnx_mnist.py

示例10: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream 
開發者ID:aimuch,項目名稱:iAI,代碼行數:12,代碼來源:onnx_resnet50.py

示例11: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:34,代碼來源:tf_to_trt.py

示例12: _create_context

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def _create_context(self):
        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * \
                   self.engine.max_batch_size
            host_mem = cuda.pagelocked_empty(size, np.float32)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(cuda_mem))
            if self.engine.binding_is_input(binding):
                self.host_inputs.append(host_mem)
                self.cuda_inputs.append(cuda_mem)
            else:
                self.host_outputs.append(host_mem)
                self.cuda_outputs.append(cuda_mem)
        return self.engine.create_execution_context() 
開發者ID:cristianpb,項目名稱:object-detection,代碼行數:16,代碼來源:ssd_trt_detection.py

示例13: allocate

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate(n, dtype=numpy.float32):
    """ allocate context-portable pinned host memory """
    return drv.pagelocked_empty(int(n), dtype, order='C', mem_flags=drv.host_alloc_flags.PORTABLE) 
開發者ID:benvanwerkhoven,項目名稱:kernel_tuner,代碼行數:5,代碼來源:convolution_streams.py

示例14: allocate

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate(n, dtype=numpy.float32):
    """ allocate context-portable device mapped host memory """
    return drv.pagelocked_empty(int(n), dtype, order='C', mem_flags=drv.host_alloc_flags.PORTABLE|drv.host_alloc_flags.DEVICEMAP) 
開發者ID:benvanwerkhoven,項目名稱:kernel_tuner,代碼行數:5,代碼來源:pnpoly.py

示例15: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import pagelocked_empty [as 別名]
def allocate_buffers(engine):
        h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        return h_input, d_input, h_output, d_output 
開發者ID:TAMU-VITA,項目名稱:FasterSeg,代碼行數:8,代碼來源:darts_utils.py


注:本文中的pycuda.driver.pagelocked_empty方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。