Python tensorrt.volume方法代码示例

本文整理汇总了Python中tensorrt.volume方法的典型用法代码示例。如果您正苦于以下问题：Python tensorrt.volume方法的具体用法？Python tensorrt.volume怎么用？Python tensorrt.volume使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorrt的用法示例。

在下文中一共展示了tensorrt.volume方法的14个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings

开发者ID:becauseofAI，项目名称:lffd-pytorch，代码行数:20，代码来源:predict_tensorrt.py

示例2: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

开发者ID:jkjung-avt，项目名称:tensorrt_demos，代码行数:23，代码来源:yolov3.py

示例3: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.

开发者ID:xuwanqi，项目名称:yolov3-tensorrt，代码行数:24，代码来源:common.py

示例4: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings

开发者ID:traveller59，项目名称:torch2trt，代码行数:23，代码来源:common.py

示例5: allocate_buffers_torch

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings

开发者ID:traveller59，项目名称:torch2trt，代码行数:25，代码来源:common.py

示例6: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream

开发者ID:aimuch，项目名称:iAI，代码行数:12，代码来源:onnx_resnet50.py

示例7: _create_context

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def _create_context(self):
        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * \
                   self.engine.max_batch_size
            host_mem = cuda.pagelocked_empty(size, np.float32)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(cuda_mem))
            if self.engine.binding_is_input(binding):
                self.host_inputs.append(host_mem)
                self.cuda_inputs.append(cuda_mem)
            else:
                self.host_outputs.append(host_mem)
                self.cuda_outputs.append(cuda_mem)
        return self.engine.create_execution_context()

开发者ID:cristianpb，项目名称:object-detection，代码行数:16，代码来源:ssd_trt_detection.py

示例8: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
        h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        return h_input, d_input, h_output, d_output

开发者ID:TAMU-VITA，项目名称:FasterSeg，代码行数:8，代码来源:darts_utils.py

示例9: alloc_buf

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def alloc_buf(engine):
    # host cpu mem
    h_in_size = trt.volume(engine.get_binding_shape(0))
    h_out_size = trt.volume(engine.get_binding_shape(1))
    h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
    h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
    in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
    out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
    # allocate gpu mem
    in_gpu = cuda.mem_alloc(in_cpu.nbytes)
    out_gpu = cuda.mem_alloc(out_cpu.nbytes)
    stream = cuda.Stream()
    return in_cpu, out_cpu, in_gpu, out_gpu, stream

开发者ID:ahmetgunduz，项目名称:Real-time-GesRec，代码行数:15，代码来源:speed_gpu.py

示例10: init

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def __init__(self, model_path, calibration_files, batch_size, h, w, means, stds):
    trt.IInt8EntropyCalibrator2.__init__(self)
    self.batch_size = batch_size
    self.h = h
    self.w = w
    self.input_size = [h, w]
    self.means = np.array(stds, dtype=np.float32)
    self.stds = np.array(stds, dtype=np.float32)
    assert(isinstance(calibration_files, list))
    self.calib_image_paths = calibration_files
    assert(os.path.exists(model_path))
    self.cache_file = os.path.join(model_path, "model.trt.int8calib")
    self.shape = [self.batch_size, 3] + self.input_size
    self.device_input = cuda.mem_alloc(
        trt.volume(self.shape) * trt.float32.itemsize)
    self.indices = np.arange(len(self.calib_image_paths))
    np.random.shuffle(self.indices)

    def load_batches():
      for i in range(0, len(self.calib_image_paths) - self.batch_size+1, self.batch_size):
        indexs = self.indices[i:i+self.batch_size]
        paths = [self.calib_image_paths[i] for i in indexs]
        files = self.read_batch_file(paths)
        yield files

    self.batches = load_batches()

开发者ID:PRBonn，项目名称:bonnetal，代码行数:28，代码来源:trtCalibINT8.py

示例11: init_trt_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def init_trt_buffers(cuda, trt, engine):
    """Initialize host buffers and cuda buffers for the engine."""
    assert engine[0] == 'input_1:0'
    assert engine.get_binding_shape(0)[1:] == (224, 224, 3)
    size = trt.volume((1, 224, 224, 3)) * engine.max_batch_size
    host_input = cuda.pagelocked_empty(size, np.float32)
    cuda_input = cuda.mem_alloc(host_input.nbytes)
    assert engine[1] == 'Logits/Softmax:0'
    assert engine.get_binding_shape(1)[1:] == (1000,)
    size = trt.volume((1, 1000)) * engine.max_batch_size
    host_output = cuda.pagelocked_empty(size, np.float32)
    cuda_output = cuda.mem_alloc(host_output.nbytes)
    return host_input, cuda_input, host_output, cuda_output

开发者ID:jkjung-avt，项目名称:keras_imagenet，代码行数:15，代码来源:predict_image.py

示例12: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def allocate_buffers(engine):
    """Allocates host and device buffer for TRT engine inference.

    This function is similair to the one in ../../common.py, but
    converts network outputs (which are np.float32) appropriately
    before writing them to Python buffer. This is needed, since
    TensorRT plugins doesn't support output type description, and
    in our particular case, we use NMS plugin as network output.

    Args:
        engine (trt.ICudaEngine): TensorRT engine

    Returns:
        inputs [HostDeviceMem]: engine input memory
        outputs [HostDeviceMem]: engine output memory
        bindings [int]: buffer to device bindings
        stream (cuda.Stream): cuda stream for engine inference synchronization
    """
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    # Current NMS implementation in TRT only supports DataType.FLOAT but
    # it may change in the future, which could brake this sample here
    # when using lower precision [e.g. NMS output would not be np.float32
    # anymore, even though this is assumed in binding_to_type]
    binding_to_type = {"Input": np.float32, "NMS": np.float32, "NMS_1": np.int32}

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = binding_to_type[str(binding)]
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

开发者ID:aimuch，项目名称:iAI，代码行数:45，代码来源:engine.py

示例13: init

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def __init__(self, trt_engine_path, uff_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
        """Initializes TensorRT objects needed for model inference.

        Args:
            trt_engine_path (str): path where TensorRT engine should be stored
            uff_model_path (str): path of .uff model
            trt_engine_datatype (trt.DataType):
                requested precision of TensorRT engine used for inference
            batch_size (int): batch size for which engine
                should be optimized for
        """

        # We first load all custom plugins shipped with TensorRT,
        # some of them will be needed during inference
        trt.init_libnvinfer_plugins(TRT_LOGGER, '')

        # Initialize runtime needed for loading TensorRT engine from file
        self.trt_runtime = trt.Runtime(TRT_LOGGER)
        # TRT engine placeholder
        self.trt_engine = None

        # Display requested engine settings to stdout
        print("TensorRT inference engine settings:")
        print("  * Inference precision - {}".format(trt_engine_datatype))
        print("  * Max batch size - {}\n".format(batch_size))

        # If engine is not cached, we need to build it
        if not os.path.exists(trt_engine_path):
            # This function uses supplied .uff file
            # alongside with UffParser to build TensorRT
            # engine. For more details, check implmentation
            self.trt_engine = engine_utils.build_engine(
                uff_model_path, TRT_LOGGER,
                trt_engine_datatype=trt_engine_datatype,
                batch_size=batch_size)
            # Save the engine to file
            engine_utils.save_engine(self.trt_engine, trt_engine_path)

        # If we get here, the file with engine exists, so we can load it
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(
                trt_engine_path))
            self.trt_engine = engine_utils.load_engine(
                self.trt_runtime, trt_engine_path)

        # This allocates memory for network inputs/outputs on both CPU and GPU
        self.inputs, self.outputs, self.bindings, self.stream = \
            engine_utils.allocate_buffers(self.trt_engine)

        # Execution context is needed for inference
        self.context = self.trt_engine.create_execution_context()

        # Allocate memory for multiple usage [e.g. multiple batch inference]
        input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE)
        self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))

开发者ID:aimuch，项目名称:iAI，代码行数:57，代码来源:inference.py

示例14: run_speed_eval

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import volume [as 别名]
def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):

        def allocate_buffers(engine):
            inputs = []
            outputs = []
            bindings = []
            for binding in engine:
                size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                # Allocate host and device buffers
                host_mem = cuda.pagelocked_empty(size, dtype)
                device_mem = cuda.mem_alloc(host_mem.nbytes)
                # Append the device buffer to device bindings.
                bindings.append(int(device_mem))
                # Append to the appropriate list.
                if engine.binding_is_input(binding):
                    inputs.append(HostDeviceMem(host_mem, device_mem))
                else:
                    outputs.append(HostDeviceMem(host_mem, device_mem))
            return inputs, outputs, bindings

        inputs, outputs, bindings = allocate_buffers(self.engine)
        # warm run
        for i in range(warm_run_loops):
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]

        # real run
        logging.info('Start real run loop.')
        sum_time_data_copy = 0.
        sum_time_inference_only = 0.
        for i in range(real_run_loops):
            time_start = time.time()
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            sum_time_data_copy += time.time() - time_start

            time_start = time.time()
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            sum_time_inference_only += time.time() - time_start

            time_start = time.time()
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
            sum_time_data_copy += time.time() - time_start

        logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
                     % ((sum_time_data_copy + sum_time_inference_only) * 1000,
                        (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
                        real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only)))

开发者ID:becauseofAI，项目名称:lffd-pytorch，代码行数:51，代码来源:inference_speed_eval_with_tensorrt_cudnn.py

注：本文中的tensorrt.volume方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。