Python driver.memcpy_dtoh_async方法代碼示例

本文整理匯總了Python中pycuda.driver.memcpy_dtoh_async方法的典型用法代碼示例。如果您正苦於以下問題：Python driver.memcpy_dtoh_async方法的具體用法？Python driver.memcpy_dtoh_async怎麽用？Python driver.memcpy_dtoh_async使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver的用法示例。

在下文中一共展示了driver.memcpy_dtoh_async方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output = np.empty(output_size, dtype = np.float32)
    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:26，代碼來源:mnist_api.py

示例2: inference_image

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def inference_image(context, input_img, batch_size):
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
    out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
    # output vector size
    output_size = 1000
    # create output array
    output = np.empty(output_size, dtype=np.float32)
    # allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    # create input/output bindings
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # transfer predictions
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # synchronize threads
    stream.synchronize()
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:27，代碼來源:sample_onnx.py

示例3: prediction

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def prediction(self, img):
        img_resized = _preprocess_trt(img, self.input_shape)
        np.copyto(self.host_inputs[0], img_resized.ravel())

        cuda.memcpy_htod_async(
            self.cuda_inputs[0], self.host_inputs[0], self.stream)
        self.context.execute_async(
            batch_size=1,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        cuda.memcpy_dtoh_async(
            self.host_outputs[1], self.cuda_outputs[1], self.stream)
        cuda.memcpy_dtoh_async(
            self.host_outputs[0], self.cuda_outputs[0], self.stream)
        self.stream.synchronize()

        output = self.host_outputs[0]
        return output

開發者ID:cristianpb，項目名稱:object-detection，代碼行數:20，代碼來源:ssd_trt_detection.py

示例4: detect

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def detect(self, img, conf_th=0.3):
        """Detect objects in the input image."""
        img_resized = _preprocess_trt(img, self.input_shape)
        np.copyto(self.host_inputs[0], img_resized.ravel())

        cuda.memcpy_htod_async(
            self.cuda_inputs[0], self.host_inputs[0], self.stream)
        self.context.execute_async(
            batch_size=1,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        cuda.memcpy_dtoh_async(
            self.host_outputs[1], self.cuda_outputs[1], self.stream)
        cuda.memcpy_dtoh_async(
            self.host_outputs[0], self.cuda_outputs[0], self.stream)
        self.stream.synchronize()

        output = self.host_outputs[0]
        return _postprocess_trt(img, output, conf_th, self.output_layout)

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:21，代碼來源:ssd.py

示例5: do_inference

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    """do_inference (for TensorRT 6.x or lower)

    This function is generalized for multiple inputs/outputs.
    Inputs and outputs are expected to be lists of HostDeviceMem objects.
    """
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size,
                          bindings=bindings,
                          stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:20，代碼來源:yolov3.py

示例6: do_inference_v2

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def do_inference_v2(context, bindings, inputs, outputs, stream):
    """do_inference_v2 (for TensorRT 7.0+)

    This function is generalized for multiple inputs/outputs for full
    dimension networks.
    Inputs and outputs are expected to be lists of HostDeviceMem objects.
    """
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:19，代碼來源:yolov3.py

示例7: execute_async

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def execute_async(self, batch_size):
        [
            cuda.memcpy_htod_async(inp.device, inp.host[:batch_size],
                                   self.stream) for inp in self.inputs
            if inp.device_input is False
        ]
        self.context.execute_async(
            batch_size=batch_size,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        [
            cuda.memcpy_dtoh_async(out.host[:batch_size], out.device,
                                   self.stream) for out in self.outputs
        ]
        self.stream.synchronize()
        return {n: v.host[:batch_size] for n, v in self.output_dict.items()}

開發者ID:traveller59，項目名稱:torch2trt，代碼行數:18，代碼來源:inference.py

示例8: get

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def get(self, stream=None):
        """
        Copy device array to host.

        Returns:
            numpy.ndarray: A host numpy array
        """

        if self.is_contiguous:
            ary = np.empty(self.shape, self.dtype)
            drv.memcpy_dtoh_async(ary, self.gpudata, stream)
        else:
            # if it is not contiguous, need to copy it over to new device mem
            ary_d = self.backend.empty(self.shape, self.dtype)
            ary_d.copy(self)
            ary = np.empty(self.shape, self.dtype)
            drv.memcpy_dtoh_async(ary, ary_d.gpudata, stream)
        return ary

開發者ID:NervanaSystems，項目名稱:neon，代碼行數:20，代碼來源:nervanagpu.py

示例9: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #synchronize threads
    stream.synchronize()

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:caffe_mnist.py

示例10: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:29，代碼來源:uff_mnist.py

示例11: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:tf_to_trt.py

示例12: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)

    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size

    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:onnx_mnist.py

示例13: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to Float32
    input_img = input_img.astype(np.float32)
    # Create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    # Alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    # Return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:28，代碼來源:custom_layers.py

示例14: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data 
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:28，代碼來源:caffe_mnist.py

示例15: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import memcpy_dtoh_async [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:29，代碼來源:uff_mnist.py

注：本文中的pycuda.driver.memcpy_dtoh_async方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。