當前位置: 首頁>>代碼示例>>Python>>正文


Python tensorrt.nptype方法代碼示例

本文整理匯總了Python中tensorrt.nptype方法的典型用法代碼示例。如果您正苦於以下問題:Python tensorrt.nptype方法的具體用法?Python tensorrt.nptype怎麽用?Python tensorrt.nptype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tensorrt的用法示例。


在下文中一共展示了tensorrt.nptype方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects. 
開發者ID:aimuch,項目名稱:iAI,代碼行數:24,代碼來源:common.py

示例2: __allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def __allocate_buffers(self, engine):
        inputs = []
        outputs = []
        bindings = []
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings 
開發者ID:becauseofAI,項目名稱:lffd-pytorch,代碼行數:20,代碼來源:predict_tensorrt.py

示例3: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream 
開發者ID:jkjung-avt,項目名稱:tensorrt_demos,代碼行數:23,代碼來源:yolov3.py

示例4: get_input_metadata

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def get_input_metadata(self):
        inputs = OrderedDict()
        active_profile = self.context.active_optimization_profile
        bindings_per_profile = len(self.engine) // self.engine.num_optimization_profiles
        logging.debug(
            "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}".format(
                self.engine.num_optimization_profiles, bindings_per_profile, active_profile
            )
        )

        start_binding = bindings_per_profile * active_profile
        end_binding = start_binding + bindings_per_profile
        logging.info("Start Binding: {:}, End Binding: {:}".format(start_binding, end_binding))

        for binding in range(start_binding, end_binding):
            if self.engine.binding_is_input(binding):
                inputs[self.engine[binding]] = (
                    trt.nptype(self.engine.get_binding_dtype(binding)),
                    list(self.engine.get_binding_shape(binding)),
                )
        return inputs 
開發者ID:NVIDIA,項目名稱:NeMo,代碼行數:23,代碼來源:tensorrt_runner.py

示例5: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:23,代碼來源:common.py

示例6: allocate_buffers_torch

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers_torch(engine: trt.ICudaEngine, device):
    import torch
    inputs = []
    outputs = []
    bindings = []
    index = 0
    dtype_map = np_to_torch_dtype_map()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        shape = [engine.max_batch_size] + list(engine.get_binding_shape(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype).reshape(shape)
        device_mem = torch.empty(*host_mem.shape, device=device, dtype=dtype_map[host_mem.dtype])
        # Append the device buffer to device bindings.
        bindings.append(device_mem.data_ptr())
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, index))
        index += 1
    return inputs, outputs, bindings 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:25,代碼來源:common.py

示例7: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
    # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    return h_input, d_input, h_output, d_output, stream 
開發者ID:aimuch,項目名稱:iAI,代碼行數:12,代碼來源:onnx_resnet50.py

示例8: load_normalized_test_case

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_normalized_test_case(test_image, pagelocked_buffer):
    # Converts the input image to a CHW Numpy array
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
        return (image_arr / 255.0 - 0.45) / 0.225

    # Normalize the image and copy to pagelocked memory.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
    return test_image 
開發者ID:aimuch,項目名稱:iAI,代碼行數:14,代碼來源:onnx_resnet50.py

示例9: load_normalized_test_case

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_normalized_test_case(test_image, pagelocked_buffer):
    # Converts the input image to a CHW Numpy array
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()

    # Normalize the image and copy to pagelocked memory.
    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
    return test_image 
開發者ID:aimuch,項目名稱:iAI,代碼行數:12,代碼來源:uff_resnet50.py

示例10: allocate_buffers

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def allocate_buffers(engine):
        h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(DTYPE))
        h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(DTYPE))
        d_input = cuda.mem_alloc(h_input.nbytes)
        d_output = cuda.mem_alloc(h_output.nbytes)
        return h_input, d_input, h_output, d_output 
開發者ID:TAMU-VITA,項目名稱:FasterSeg,代碼行數:8,代碼來源:darts_utils.py

示例11: load_input

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def load_input(input_size, host_buffer):
        assert len(input_size) == 4
        b, c, h, w = input_size
        dtype = trt.nptype(DTYPE)
        img_array = np.random.randn(c, h, w).astype(dtype).ravel()
        np.copyto(host_buffer, img_array) 
開發者ID:TAMU-VITA,項目名稱:FasterSeg,代碼行數:8,代碼來源:darts_utils.py

示例12: alloc_buf

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def alloc_buf(engine):
    # host cpu mem
    h_in_size = trt.volume(engine.get_binding_shape(0))
    h_out_size = trt.volume(engine.get_binding_shape(1))
    h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
    h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
    in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
    out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
    # allocate gpu mem
    in_gpu = cuda.mem_alloc(in_cpu.nbytes)
    out_gpu = cuda.mem_alloc(out_cpu.nbytes)
    stream = cuda.Stream()
    return in_cpu, out_cpu, in_gpu, out_gpu, stream 
開發者ID:ahmetgunduz,項目名稱:Real-time-GesRec,代碼行數:15,代碼來源:speed_gpu.py

示例13: get_input_metadata_from_profile

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def get_input_metadata_from_profile(profile, network):
    input_metadata = OrderedDict()
    for index in range(network.num_inputs):
        tensor = network.get_input(index)
        if tensor.is_shape_tensor:
            shapes = profile.get_shape_input(tensor.name)
        else:
            shapes = profile.get_shape(tensor.name)
        if tuple(shapes[0]) != tuple(shapes[1]):
            logging.warning("In profile 0, min != max, using opt shapes for calibration")
        # Always use opt shape
        input_metadata[tensor.name] = (trt.nptype(tensor.dtype), shapes[1])
    return input_metadata 
開發者ID:NVIDIA,項目名稱:NeMo,代碼行數:15,代碼來源:tensorrt_runner.py

示例14: from_engine

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def from_engine(engine):
        buffers = Buffers()
        for binding in engine:
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            buffers.device_buffers[binding] = DeviceBuffer(dtype=dtype)
            if not engine.binding_is_input(binding):
                buffers.host_outputs[binding] = HostBuffer(dtype=dtype)
        return buffers 
開發者ID:NVIDIA,項目名稱:NeMo,代碼行數:10,代碼來源:tensorrt_runner.py

示例15: run_speed_eval

# 需要導入模塊: import tensorrt [as 別名]
# 或者: from tensorrt import nptype [as 別名]
def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):

        def allocate_buffers(engine):
            inputs = []
            outputs = []
            bindings = []
            for binding in engine:
                size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
                dtype = trt.nptype(engine.get_binding_dtype(binding))
                # Allocate host and device buffers
                host_mem = cuda.pagelocked_empty(size, dtype)
                device_mem = cuda.mem_alloc(host_mem.nbytes)
                # Append the device buffer to device bindings.
                bindings.append(int(device_mem))
                # Append to the appropriate list.
                if engine.binding_is_input(binding):
                    inputs.append(HostDeviceMem(host_mem, device_mem))
                else:
                    outputs.append(HostDeviceMem(host_mem, device_mem))
            return inputs, outputs, bindings

        inputs, outputs, bindings = allocate_buffers(self.engine)
        # warm run
        for i in range(warm_run_loops):
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]

        # real run
        logging.info('Start real run loop.')
        sum_time_data_copy = 0.
        sum_time_inference_only = 0.
        for i in range(real_run_loops):
            time_start = time.time()
            [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
            sum_time_data_copy += time.time() - time_start

            time_start = time.time()
            self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
            sum_time_inference_only += time.time() - time_start

            time_start = time.time()
            [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
            sum_time_data_copy += time.time() - time_start

        logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
                     % ((sum_time_data_copy + sum_time_inference_only) * 1000,
                        (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
                        real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only))) 
開發者ID:becauseofAI,項目名稱:lffd-pytorch,代碼行數:51,代碼來源:inference_speed_eval_with_tensorrt_cudnn.py


注:本文中的tensorrt.nptype方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。