當前位置: 首頁>>代碼示例>>Python>>正文


Python driver.mem_alloc方法代碼示例

本文整理匯總了Python中pycuda.driver.mem_alloc方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.mem_alloc方法的具體用法?Python driver.mem_alloc怎麽用?Python driver.mem_alloc使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pycuda.driver的用法示例。


在下文中一共展示了driver.mem_alloc方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output = np.empty(output_size, dtype = np.float32)
    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:26,代碼來源:mnist_api.py

示例2: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects. 
開發者ID:aimuch,項目名稱:iAI,代碼行數:24,代碼來源:common.py

示例3: setup_binding_shapes

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def setup_binding_shapes(
    engine: trt.ICudaEngine,
    context: trt.IExecutionContext,
    host_inputs: List[np.ndarray],
    input_binding_idxs: List[int],
    output_binding_idxs: List[int],
):
    # Explicitly set the dynamic input shapes, so the dynamic output
    # shapes can be computed internally
    for host_input, binding_index in zip(host_inputs, input_binding_idxs):
        context.set_binding_shape(binding_index, host_input.shape)

    assert context.all_binding_shapes_specified

    host_outputs = []
    device_outputs = []
    for binding_index in output_binding_idxs:
        output_shape = context.get_binding_shape(binding_index)
        # Allocate buffers to hold output results after copying back to host
        buffer = np.empty(output_shape, dtype=np.float32)
        host_outputs.append(buffer)
        # Allocate output buffers on device
        device_outputs.append(cuda.mem_alloc(buffer.nbytes))

    return host_outputs, device_outputs 
開發者ID:rmccorm4,項目名稱:tensorrt-utils,代碼行數:27,代碼來源:infer.py

示例4: __init__

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def __init__(self, calibration_files=[], batch_size=32, input_shape=(3, 224, 224),
                 cache_file="calibration.cache", preprocess_func=None):
        super().__init__()
        self.input_shape = input_shape
        self.cache_file = cache_file
        self.batch_size = batch_size
        self.batch = np.zeros((self.batch_size, *self.input_shape), dtype=np.float32)
        self.device_input = cuda.mem_alloc(self.batch.nbytes)

        self.files = calibration_files
        # Pad the list so it is a multiple of batch_size
        if len(self.files) % self.batch_size != 0:
            logger.info("Padding # calibration files to be a multiple of batch_size {:}".format(self.batch_size))
            self.files += calibration_files[(len(calibration_files) % self.batch_size):self.batch_size]

        self.batches = self.load_batches()

        if preprocess_func is None:
            logger.error("No preprocess_func defined! Please provide one to the constructor.")
            sys.exit(1)
        else:
            self.preprocess_func = preprocess_func 
開發者ID:rmccorm4,項目名稱:tensorrt-utils,代碼行數:24,代碼來源:ImagenetCalibrator.py

示例5: ready_argument_list

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def ready_argument_list(self, arguments):
        """ready argument list to be passed to the kernel, allocates gpu mem

        :param arguments: List of arguments to be passed to the kernel.
            The order should match the argument list on the CUDA kernel.
            Allowed values are numpy.ndarray, and/or numpy.int32, numpy.float32, and so on.
        :type arguments: list(numpy objects)

        :returns: A list of arguments that can be passed to an CUDA kernel.
        :rtype: list( pycuda.driver.DeviceAllocation, numpy.int32, ... )
        """
        gpu_args = []
        for arg in arguments:
            # if arg i is a numpy array copy to device
            if isinstance(arg, numpy.ndarray):
                alloc = drv.mem_alloc(arg.nbytes)
                self.allocations.append(alloc)
                gpu_args.append(alloc)
                drv.memcpy_htod(gpu_args[-1], arg)
            else: # if not an array, just pass argument along
                gpu_args.append(arg)
        return gpu_args 
開發者ID:benvanwerkhoven,項目名稱:kernel_tuner,代碼行數:24,代碼來源:cuda.py

示例6: create_memory

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def create_memory(engine, name,  buf, mem, batchsize, inp, inp_idx):
    binding_idx = engine.get_binding_index(name)
    if binding_idx == -1:
        raise AttributeError("Not a valid binding")
    print("Binding: name={}, bindingIndex={}".format(name, str(binding_idx)))
    dims = engine.get_binding_dimensions(binding_idx).to_DimsCHW()
    eltCount = dims.C() * dims.H() * dims.W() * batchsize

    if engine.binding_is_input(binding_idx):
        h_mem = inp[inp_idx]
        inp_idx = inp_idx + 1
    else:
        h_mem = np.random.uniform(0.0, 255.0, eltCount).astype(np.dtype('f4'))

    d_mem = cuda.mem_alloc(eltCount * 4)
    cuda.memcpy_htod(d_mem, h_mem)
    buf.insert(binding_idx, int(d_mem))
    mem.append(d_mem)
    return inp_idx


#Run inference on device 
開發者ID:CUHKSZ-TQL,項目名稱:EverybodyDanceNow_reproduce_pytorch,代碼行數:24,代碼來源:run_engine.py

示例7: __allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings 
開發者ID:becauseofAI,項目名稱:lffd-pytorch,代碼行數:20,代碼來源:predict_tensorrt.py

示例8: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream 
開發者ID:jkjung-avt,項目名稱:tensorrt_demos,代碼行數:23,代碼來源:yolov3.py

示例9: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:23,代碼來源:common.py

示例10: _set_rand_state_dev

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def _set_rand_state_dev(self, state=None):
        """
        Set on device RNG states to values given by "state" input.

        Arguments:
            state (np.array or None): an array of uint32 values used to
                                      set the state of the on device LFSRs.
                                      if set to None, the state will be created
                                      randomly
        """
        ctx = drv.Context.get_current()
        if state is None:
            state = self._gen_dev_randstate()
        if ctx in self.context_rand_state_map:
            rand_state = self.context_rand_state_map[ctx]
        else:
            rand_state = drv.mem_alloc(state.nbytes)
            self.context_rand_state_map[ctx] = rand_state
        drv.memcpy_htod(rand_state, state)
        self.context_rand_state_alive[ctx] = True
        return 
開發者ID:NervanaSystems,項目名稱:neon,代碼行數:23,代碼來源:nervanagpu.py

示例11: empty_like

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def empty_like(self, other_ary, name=None):
        """
        Instantiate a new instance of this backend's Tensor class, with the
        shape taken from ary.

        Arguments:
            ary (tensor object): Tensor to inherit the dimensions of.
            dtype (data-type, optional): If present, specifies the underlying
                                         type to employ for each element.

        Returns:
            Tensor: array object
        """
        # if other_ary is a numpy array it wont have attr persist_values or
        # allocator so use default values in that case.
        return GPUTensor(self, other_ary.shape, dtype=other_ary.dtype,
                         name=name,
                         persist_values=getattr(other_ary, 'persist_values', True),
                         allocator=getattr(other_ary, 'allocator', drv.mem_alloc),
                         rounding=self.round_mode) 
開發者ID:NervanaSystems,項目名稱:neon,代碼行數:22,代碼來源:nervanagpu.py

示例12: time_inference

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def time_inference(engine, batch_size):
    assert(engine.get_nb_bindings() == 2)

    input_index = engine.get_binding_index(INPUT_LAYERS[0])
    output_index = engine.get_binding_index(OUTPUT_LAYERS[0])

    input_dim = engine.get_binding_dimensions(input_index).to_DimsCHW()
    output_dim = engine.get_binding_dimensions(output_index).to_DimsCHW()

    insize = batch_size * input_dim.C() * input_dim.H() * input_dim.W() * 4
    outsize = batch_size * output_dim.C() * output_dim.H() * output_dim.W() * 4

    d_input = cuda.mem_alloc(insize)
    d_output = cuda.mem_alloc(outsize)

    bindings = [int(d_input), int(d_output)]

    context = engine.create_execution_context()
    context.set_profiler(G_PROFILER)

    cuda.memset_d32(d_input, 0, insize // 4)

    for i in range(TIMING_INTERATIONS):
        context.execute(batch_size, bindings)

    context.destroy()
    return 
開發者ID:aimuch,項目名稱:iAI,代碼行數:29,代碼來源:googlenet.py

示例13: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #synchronize threads
    stream.synchronize()

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:31,代碼來源:caffe_mnist.py

示例14: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:29,代碼來源:uff_mnist.py

示例15: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import mem_alloc [as 別名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
開發者ID:aimuch,項目名稱:iAI,代碼行數:31,代碼來源:tf_to_trt.py


注:本文中的pycuda.driver.mem_alloc方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。