当前位置: 首页>>代码示例>>Python>>正文


Python driver.memcpy_dtoh_async方法代码示例

本文整理汇总了Python中pycuda.driver.memcpy_dtoh_async方法的典型用法代码示例。如果您正苦于以下问题:Python driver.memcpy_dtoh_async方法的具体用法?Python driver.memcpy_dtoh_async怎么用?Python driver.memcpy_dtoh_async使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycuda.driver的用法示例。


在下文中一共展示了driver.memcpy_dtoh_async方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output = np.empty(output_size, dtype = np.float32)
    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:26,代码来源:mnist_api.py

示例2: inference_image

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def inference_image(context, input_img, batch_size):
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
    out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
    # output vector size
    output_size = 1000
    # create output array
    output = np.empty(output_size, dtype=np.float32)
    # allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    # create input/output bindings
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # transfer predictions
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # synchronize threads
    stream.synchronize()
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:27,代码来源:sample_onnx.py

示例3: prediction

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def prediction(self, img):
        img_resized = _preprocess_trt(img, self.input_shape)
        np.copyto(self.host_inputs[0], img_resized.ravel())

        cuda.memcpy_htod_async(
            self.cuda_inputs[0], self.host_inputs[0], self.stream)
        self.context.execute_async(
            batch_size=1,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        cuda.memcpy_dtoh_async(
            self.host_outputs[1], self.cuda_outputs[1], self.stream)
        cuda.memcpy_dtoh_async(
            self.host_outputs[0], self.cuda_outputs[0], self.stream)
        self.stream.synchronize()

        output = self.host_outputs[0]
        return output 
开发者ID:cristianpb,项目名称:object-detection,代码行数:20,代码来源:ssd_trt_detection.py

示例4: detect

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def detect(self, img, conf_th=0.3):
        """Detect objects in the input image."""
        img_resized = _preprocess_trt(img, self.input_shape)
        np.copyto(self.host_inputs[0], img_resized.ravel())

        cuda.memcpy_htod_async(
            self.cuda_inputs[0], self.host_inputs[0], self.stream)
        self.context.execute_async(
            batch_size=1,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        cuda.memcpy_dtoh_async(
            self.host_outputs[1], self.cuda_outputs[1], self.stream)
        cuda.memcpy_dtoh_async(
            self.host_outputs[0], self.cuda_outputs[0], self.stream)
        self.stream.synchronize()

        output = self.host_outputs[0]
        return _postprocess_trt(img, output, conf_th, self.output_layout) 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:21,代码来源:ssd.py

示例5: do_inference

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    """do_inference (for TensorRT 6.x or lower)

    This function is generalized for multiple inputs/outputs.
    Inputs and outputs are expected to be lists of HostDeviceMem objects.
    """
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size,
                          bindings=bindings,
                          stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs] 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:20,代码来源:yolov3.py

示例6: do_inference_v2

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def do_inference_v2(context, bindings, inputs, outputs, stream):
    """do_inference_v2 (for TensorRT 7.0+)

    This function is generalized for multiple inputs/outputs for full
    dimension networks.
    Inputs and outputs are expected to be lists of HostDeviceMem objects.
    """
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs] 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:19,代码来源:yolov3.py

示例7: execute_async

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def execute_async(self, batch_size):
        [
            cuda.memcpy_htod_async(inp.device, inp.host[:batch_size],
                                   self.stream) for inp in self.inputs
            if inp.device_input is False
        ]
        self.context.execute_async(
            batch_size=batch_size,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        [
            cuda.memcpy_dtoh_async(out.host[:batch_size], out.device,
                                   self.stream) for out in self.outputs
        ]
        self.stream.synchronize()
        return {n: v.host[:batch_size] for n, v in self.output_dict.items()} 
开发者ID:traveller59,项目名称:torch2trt,代码行数:18,代码来源:inference.py

示例8: get

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def get(self, stream=None):
        """
        Copy device array to host.

        Returns:
            numpy.ndarray: A host numpy array
        """

        if self.is_contiguous:
            ary = np.empty(self.shape, self.dtype)
            drv.memcpy_dtoh_async(ary, self.gpudata, stream)
        else:
            # if it is not contiguous, need to copy it over to new device mem
            ary_d = self.backend.empty(self.shape, self.dtype)
            ary_d.copy(self)
            ary = np.empty(self.shape, self.dtype)
            drv.memcpy_dtoh_async(ary, ary_d.gpudata, stream)
        return ary 
开发者ID:NervanaSystems,项目名称:neon,代码行数:20,代码来源:nervanagpu.py

示例9: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #synchronize threads
    stream.synchronize()

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:caffe_mnist.py

示例10: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:29,代码来源:uff_mnist.py

示例11: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:tf_to_trt.py

示例12: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)

    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size

    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:onnx_mnist.py

示例13: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to Float32
    input_img = input_img.astype(np.float32)
    # Create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    # Alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    # Return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:28,代码来源:custom_layers.py

示例14: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data 
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:28,代码来源:caffe_mnist.py

示例15: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:29,代码来源:uff_mnist.py


注:本文中的pycuda.driver.memcpy_dtoh_async方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。