当前位置: 首页>>代码示例>>Python>>正文


Python tensorrt.nptype方法代码示例

本文整理汇总了Python中tensorrt.nptype方法的典型用法代码示例。如果您正苦于以下问题:Python tensorrt.nptype方法的具体用法?Python tensorrt.nptype怎么用?Python tensorrt.nptype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensorrt的用法示例。


在下文中一共展示了tensorrt.nptype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects. 
开发者ID:aimuch,项目名称:iAI,代码行数:24,代码来源:common.py

示例2: __allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings 
开发者ID:becauseofAI,项目名称:lffd-pytorch,代码行数:20,代码来源:predict_tensorrt.py

示例3: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:23,代码来源:yolov3.py

示例4: get_input_metadata

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def get_input_metadata(self):
        inputs = OrderedDict()
        active_profile = self.context.active_optimization_profile
        bindings_per_profile = len(self.engine) // self.engine.num_optimization_profiles
        logging.debug(
            "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}".format(
                self.engine.num_optimization_profiles, bindings_per_profile, active_profile
            )
        )

        start_binding = bindings_per_profile * active_profile
        end_binding = start_binding + bindings_per_profile
        logging.info("Start Binding: {:}, End Binding: {:}".format(start_binding, end_binding))

        for binding in range(start_binding, end_binding):
            if self.engine.binding_is_input(binding):
                inputs[self.engine[binding]] = (
                    trt.nptype(self.engine.get_binding_dtype(binding)),
                    list(self.engine.get_binding_shape(binding)),
                )
        return inputs 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:23,代码来源:tensorrt_runner.py

示例5: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
开发者ID:traveller59,项目名称:torch2trt,代码行数:23,代码来源:common.py

示例6: allocate_buffers_torch

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
开发者ID:traveller59,项目名称:torch2trt,代码行数:25,代码来源:common.py

示例7: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream 
开发者ID:aimuch,项目名称:iAI,代码行数:12,代码来源:onnx_resnet50.py

示例8: load_normalized_test_case

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def load_normalized_test_case(test_image, pagelocked_buffer):
    # Converts the input image to a CHW Numpy array
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
        return (image_arr / 255.0 - 0.45) / 0.225

    # Normalize the image and copy to pagelocked memory.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
    return test_image 
开发者ID:aimuch,项目名称:iAI,代码行数:14,代码来源:onnx_resnet50.py

示例9: load_normalized_test_case

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def load_normalized_test_case(test_image, pagelocked_buffer):
    # Converts the input image to a CHW Numpy array
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # Normalize the image and copy to pagelocked memory.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
    return test_image 
开发者ID:aimuch,项目名称:iAI,代码行数:12,代码来源:uff_resnet50.py

示例10: allocate_buffers

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def allocate_buffers(engine):
        h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        return h_input, d_input, h_output, d_output 
开发者ID:TAMU-VITA,项目名称:FasterSeg,代码行数:8,代码来源:darts_utils.py

示例11: load_input

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def load_input(input_size, host_buffer):
        assert len(input_size) == 4
        b, c, h, w = input_size
        dtype = trt.nptype(DTYPE)
        img_array = np.random.randn(c, h, w).astype(dtype).ravel()
        np.copyto(host_buffer, img_array) 
开发者ID:TAMU-VITA,项目名称:FasterSeg,代码行数:8,代码来源:darts_utils.py

示例12: alloc_buf

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def alloc_buf(engine):
    # host cpu mem
    h_in_size = trt.volume(engine.get_binding_shape(0))
    h_out_size = trt.volume(engine.get_binding_shape(1))
    h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
    h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
    in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
    out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
    # allocate gpu mem
    in_gpu = cuda.mem_alloc(in_cpu.nbytes)
    out_gpu = cuda.mem_alloc(out_cpu.nbytes)
    stream = cuda.Stream()
    return in_cpu, out_cpu, in_gpu, out_gpu, stream 
开发者ID:ahmetgunduz,项目名称:Real-time-GesRec,代码行数:15,代码来源:speed_gpu.py

示例13: get_input_metadata_from_profile

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def get_input_metadata_from_profile(profile, network):
    input_metadata = OrderedDict()
    for index in range(network.num_inputs):
        tensor = network.get_input(index)
        if tensor.is_shape_tensor:
            shapes = profile.get_shape_input(tensor.name)
        else:
            shapes = profile.get_shape(tensor.name)
        if tuple(shapes[0]) != tuple(shapes[1]):
            logging.warning("In profile 0, min != max, using opt shapes for calibration")
        # Always use opt shape
        input_metadata[tensor.name] = (trt.nptype(tensor.dtype), shapes[1])
    return input_metadata 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:15,代码来源:tensorrt_runner.py

示例14: from_engine

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def from_engine(engine):
        buffers = Buffers()
        for binding in engine:
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            buffers.device_buffers[binding] = DeviceBuffer(dtype=dtype)
            if not engine.binding_is_input(binding):
                buffers.host_outputs[binding] = HostBuffer(dtype=dtype)
        return buffers 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:10,代码来源:tensorrt_runner.py

示例15: run_speed_eval

# 需要导入模块: import tensorrt [as 别名]
# 或者: from tensorrt import nptype [as 别名]
def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):

        def allocate_buffers(engine):
            inputs = []
            outputs = []
            bindings = []
            for binding in engine:
                size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                # Allocate host and device buffers
                host_mem = cuda.pagelocked_empty(size, dtype)
                device_mem = cuda.mem_alloc(host_mem.nbytes)
                # Append the device buffer to device bindings.
                bindings.append(int(device_mem))
                # Append to the appropriate list.
                if engine.binding_is_input(binding):
                    inputs.append(HostDeviceMem(host_mem, device_mem))
                else:
                    outputs.append(HostDeviceMem(host_mem, device_mem))
            return inputs, outputs, bindings

        inputs, outputs, bindings = allocate_buffers(self.engine)
        # warm run
        for i in range(warm_run_loops):
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]

        # real run
        logging.info('Start real run loop.')
        sum_time_data_copy = 0.
        sum_time_inference_only = 0.
        for i in range(real_run_loops):
            time_start = time.time()
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            sum_time_data_copy += time.time() - time_start

            time_start = time.time()
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            sum_time_inference_only += time.time() - time_start

            time_start = time.time()
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
            sum_time_data_copy += time.time() - time_start

        logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
                     % ((sum_time_data_copy + sum_time_inference_only) * 1000,
                        (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
                        real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only))) 
开发者ID:becauseofAI,项目名称:lffd-pytorch,代码行数:51,代码来源:inference_speed_eval_with_tensorrt_cudnn.py


注:本文中的tensorrt.nptype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。