Python tensorrt.volume方法代碼示例

本文整理匯總了Python中tensorrt.volume方法的典型用法代碼示例。如果您正苦於以下問題：Python tensorrt.volume方法的具體用法？Python tensorrt.volume怎麽用？Python tensorrt.volume使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensorrt的用法示例。

在下文中一共展示了tensorrt.volume方法的14個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings

開發者ID:becauseofAI，項目名稱:lffd-pytorch，代碼行數:20，代碼來源:predict_tensorrt.py

示例2: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:23，代碼來源:yolov3.py

示例3: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.

開發者ID:xuwanqi，項目名稱:yolov3-tensorrt，代碼行數:24，代碼來源:common.py

示例4: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings

開發者ID:traveller59，項目名稱:torch2trt，代碼行數:23，代碼來源:common.py

示例5: allocate_buffers_torch

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings

開發者ID:traveller59，項目名稱:torch2trt，代碼行數:25，代碼來源:common.py

示例6: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream

開發者ID:aimuch，項目名稱:iAI，代碼行數:12，代碼來源:onnx_resnet50.py

示例7: _create_context

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def _create_context(self):
        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * \
                   self.engine.max_batch_size
            host_mem = cuda.pagelocked_empty(size, np.float32)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(cuda_mem))
            if self.engine.binding_is_input(binding):
                self.host_inputs.append(host_mem)
                self.cuda_inputs.append(cuda_mem)
            else:
                self.host_outputs.append(host_mem)
                self.cuda_outputs.append(cuda_mem)
        return self.engine.create_execution_context()

開發者ID:cristianpb，項目名稱:object-detection，代碼行數:16，代碼來源:ssd_trt_detection.py

示例8: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
        h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        return h_input, d_input, h_output, d_output

開發者ID:TAMU-VITA，項目名稱:FasterSeg，代碼行數:8，代碼來源:darts_utils.py

示例9: alloc_buf

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def alloc_buf(engine):
    # host cpu mem
    h_in_size = trt.volume(engine.get_binding_shape(0))
    h_out_size = trt.volume(engine.get_binding_shape(1))
    h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
    h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
    in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
    out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
    # allocate gpu mem
    in_gpu = cuda.mem_alloc(in_cpu.nbytes)
    out_gpu = cuda.mem_alloc(out_cpu.nbytes)
    stream = cuda.Stream()
    return in_cpu, out_cpu, in_gpu, out_gpu, stream

開發者ID:ahmetgunduz，項目名稱:Real-time-GesRec，代碼行數:15，代碼來源:speed_gpu.py

示例10: init

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def __init__(self, model_path, calibration_files, batch_size, h, w, means, stds):
    trt.IInt8EntropyCalibrator2.__init__(self)
    self.batch_size = batch_size
    self.h = h
    self.w = w
    self.input_size = [h, w]
    self.means = np.array(stds, dtype=np.float32)
    self.stds = np.array(stds, dtype=np.float32)
    assert(isinstance(calibration_files, list))
    self.calib_image_paths = calibration_files
    assert(os.path.exists(model_path))
    self.cache_file = os.path.join(model_path, "model.trt.int8calib")
    self.shape = [self.batch_size, 3] + self.input_size
    self.device_input = cuda.mem_alloc(
        trt.volume(self.shape) * trt.float32.itemsize)
    self.indices = np.arange(len(self.calib_image_paths))
    np.random.shuffle(self.indices)

    def load_batches():
      for i in range(0, len(self.calib_image_paths) - self.batch_size+1, self.batch_size):
        indexs = self.indices[i:i+self.batch_size]
        paths = [self.calib_image_paths[i] for i in indexs]
        files = self.read_batch_file(paths)
        yield files

    self.batches = load_batches()

開發者ID:PRBonn，項目名稱:bonnetal，代碼行數:28，代碼來源:trtCalibINT8.py

示例11: init_trt_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def init_trt_buffers(cuda, trt, engine):
    """Initialize host buffers and cuda buffers for the engine."""
    assert engine[0] == 'input_1:0'
    assert engine.get_binding_shape(0)[1:] == (224, 224, 3)
    size = trt.volume((1, 224, 224, 3)) * engine.max_batch_size
    host_input = cuda.pagelocked_empty(size, np.float32)
    cuda_input = cuda.mem_alloc(host_input.nbytes)
    assert engine[1] == 'Logits/Softmax:0'
    assert engine.get_binding_shape(1)[1:] == (1000,)
    size = trt.volume((1, 1000)) * engine.max_batch_size
    host_output = cuda.pagelocked_empty(size, np.float32)
    cuda_output = cuda.mem_alloc(host_output.nbytes)
    return host_input, cuda_input, host_output, cuda_output

開發者ID:jkjung-avt，項目名稱:keras_imagenet，代碼行數:15，代碼來源:predict_image.py

示例12: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def allocate_buffers(engine):
    """Allocates host and device buffer for TRT engine inference.

    This function is similair to the one in ../../common.py, but
    converts network outputs (which are np.float32) appropriately
    before writing them to Python buffer. This is needed, since
    TensorRT plugins doesn't support output type description, and
    in our particular case, we use NMS plugin as network output.

    Args:
        engine (trt.ICudaEngine): TensorRT engine

    Returns:
        inputs [HostDeviceMem]: engine input memory
        outputs [HostDeviceMem]: engine output memory
        bindings [int]: buffer to device bindings
        stream (cuda.Stream): cuda stream for engine inference synchronization
    """
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    # Current NMS implementation in TRT only supports DataType.FLOAT but
    # it may change in the future, which could brake this sample here
    # when using lower precision [e.g. NMS output would not be np.float32
    # anymore, even though this is assumed in binding_to_type]
    binding_to_type = {"Input": np.float32, "NMS": np.float32, "NMS_1": np.int32}

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = binding_to_type[str(binding)]
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

開發者ID:aimuch，項目名稱:iAI，代碼行數:45，代碼來源:engine.py

示例13: init

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def __init__(self, trt_engine_path, uff_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
        """Initializes TensorRT objects needed for model inference.

        Args:
            trt_engine_path (str): path where TensorRT engine should be stored
            uff_model_path (str): path of .uff model
            trt_engine_datatype (trt.DataType):
                requested precision of TensorRT engine used for inference
            batch_size (int): batch size for which engine
                should be optimized for
        """

        # We first load all custom plugins shipped with TensorRT,
        # some of them will be needed during inference
        trt.init_libnvinfer_plugins(TRT_LOGGER, '')

        # Initialize runtime needed for loading TensorRT engine from file
        self.trt_runtime = trt.Runtime(TRT_LOGGER)
        # TRT engine placeholder
        self.trt_engine = None

        # Display requested engine settings to stdout
        print("TensorRT inference engine settings:")
        print("  * Inference precision - {}".format(trt_engine_datatype))
        print("  * Max batch size - {}\n".format(batch_size))

        # If engine is not cached, we need to build it
        if not os.path.exists(trt_engine_path):
            # This function uses supplied .uff file
            # alongside with UffParser to build TensorRT
            # engine. For more details, check implmentation
            self.trt_engine = engine_utils.build_engine(
                uff_model_path, TRT_LOGGER,
                trt_engine_datatype=trt_engine_datatype,
                batch_size=batch_size)
            # Save the engine to file
            engine_utils.save_engine(self.trt_engine, trt_engine_path)

        # If we get here, the file with engine exists, so we can load it
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(
                trt_engine_path))
            self.trt_engine = engine_utils.load_engine(
                self.trt_runtime, trt_engine_path)

        # This allocates memory for network inputs/outputs on both CPU and GPU
        self.inputs, self.outputs, self.bindings, self.stream = \
            engine_utils.allocate_buffers(self.trt_engine)

        # Execution context is needed for inference
        self.context = self.trt_engine.create_execution_context()

        # Allocate memory for multiple usage [e.g. multiple batch inference]
        input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE)
        self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))

開發者ID:aimuch，項目名稱:iAI，代碼行數:57，代碼來源:inference.py

示例14: run_speed_eval

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import volume [as 別名]
def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):

        def allocate_buffers(engine):
            inputs = []
            outputs = []
            bindings = []
            for binding in engine:
                size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                # Allocate host and device buffers
                host_mem = cuda.pagelocked_empty(size, dtype)
                device_mem = cuda.mem_alloc(host_mem.nbytes)
                # Append the device buffer to device bindings.
                bindings.append(int(device_mem))
                # Append to the appropriate list.
                if engine.binding_is_input(binding):
                    inputs.append(HostDeviceMem(host_mem, device_mem))
                else:
                    outputs.append(HostDeviceMem(host_mem, device_mem))
            return inputs, outputs, bindings

        inputs, outputs, bindings = allocate_buffers(self.engine)
        # warm run
        for i in range(warm_run_loops):
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]

        # real run
        logging.info('Start real run loop.')
        sum_time_data_copy = 0.
        sum_time_inference_only = 0.
        for i in range(real_run_loops):
            time_start = time.time()
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            sum_time_data_copy += time.time() - time_start

            time_start = time.time()
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            sum_time_inference_only += time.time() - time_start

            time_start = time.time()
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
            sum_time_data_copy += time.time() - time_start

        logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
                     % ((sum_time_data_copy + sum_time_inference_only) * 1000,
                        (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
                        real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only)))

開發者ID:becauseofAI，項目名稱:lffd-pytorch，代碼行數:51，代碼來源:inference_speed_eval_with_tensorrt_cudnn.py

注：本文中的tensorrt.volume方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。