Python driver.Stream方法代碼示例

本文整理匯總了Python中pycuda.driver.Stream方法的典型用法代碼示例。如果您正苦於以下問題：Python driver.Stream方法的具體用法？Python driver.Stream怎麽用？Python driver.Stream使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver的用法示例。

在下文中一共展示了driver.Stream方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output = np.empty(output_size, dtype = np.float32)
    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:26，代碼來源:mnist_api.py

示例2: inference_image

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def inference_image(context, input_img, batch_size):
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
    out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
    # output vector size
    output_size = 1000
    # create output array
    output = np.empty(output_size, dtype=np.float32)
    # allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    # create input/output bindings
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # transfer predictions
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # synchronize threads
    stream.synchronize()
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:27，代碼來源:sample_onnx.py

示例3: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.

開發者ID:aimuch，項目名稱:iAI，代碼行數:24，代碼來源:common.py

示例4: init

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, model, input_shape, output_layout=7):
        """Initialize TensorRT plugins, engine and conetxt."""
        self.model = model
        self.input_shape = input_shape
        self.output_layout = output_layout
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self._load_plugins()
        self.engine = self._load_engine()

        self.host_inputs = []
        self.cuda_inputs = []
        self.host_outputs = []
        self.cuda_outputs = []
        self.bindings = []
        self.stream = cuda.Stream()
        self.context = self._create_context()

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:18，代碼來源:ssd.py

示例5: allocate_buffers

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

開發者ID:jkjung-avt，項目名稱:tensorrt_demos，代碼行數:23，代碼來源:yolov3.py

示例6: init

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, layers=None, delta=None, stream = None, max_batch_size=32, max_streams=10, epochs = 10):
        
        self.network = []
        self.network_summary = []
        self.network_mem = []
        
        if stream is not None:
            self.stream = stream
        else:
            self.stream = drv.Stream()
            
            
        if delta is None:
            delta = 0.0001
            
        self.delta = delta
        self.max_batch_size=max_batch_size
        
        self.max_streams = max_streams
        
        self.epochs = epochs
        
        if layers is not None:
            for layer in layers:
                add_layer(self, layer)

開發者ID:PacktPublishing，項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA，代碼行數:27，代碼來源:deep_neural_network.py

示例7: init

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def __init__(self, context: trt.IExecutionContext, stream=None, device=None, cuda_device=None, cuda_context=None):
        self.engine = context.engine
        if device is None:
            self.torch_device = torch.device("cuda:0")
        else:
            self.torch_device = device
        inputs, outputs, bindings = allocate_buffers_torch(self.engine, self.torch_device)
        self.context = context
        self.inputs = inputs
        self.outputs = outputs
        self.bindings = bindings
        self.input_dict = {mem.name: mem for mem in inputs}
        self.output_dict = {mem.name: mem for mem in outputs}
        if stream is None:
            self.stream = cuda.Stream()
        self._batch_size = None
        self.cuda_device = cuda_device
        self.cuda_context = cuda_context

開發者ID:traveller59，項目名稱:torch2trt，代碼行數:20，代碼來源:inference.py

示例8: get_async

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def get_async(self, stream):
        """Copy and return the host buffer data.

        Parameters
        ----------
        stream : pycuda.driver.Stream
            The cuda stream to copy data.

        Returns
        -------
        numpy.array
            The numpy array taking the data.

        """
        src = self.device_buffer
        dst = self.host_buffer
        src.get_async(stream, dst)
        return dst

開發者ID:seetaresearch，項目名稱:dragon，代碼行數:20，代碼來源:engine.py

示例9: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #synchronize threads
    stream.synchronize()

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:caffe_mnist.py

示例10: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:29，代碼來源:uff_mnist.py

示例11: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:tf_to_trt.py

示例12: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)

    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size

    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:31，代碼來源:onnx_mnist.py

示例13: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to Float32
    input_img = input_img.astype(np.float32)
    # Create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    # Alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    # Return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:28，代碼來源:custom_layers.py

示例14: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data 
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output

開發者ID:aimuch，項目名稱:iAI，代碼行數:28，代碼來源:caffe_mnist.py

示例15: infer

# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Stream [as 別名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output