当前位置: 首页>>代码示例>>Python>>正文


Python driver.Stream方法代码示例

本文整理汇总了Python中pycuda.driver.Stream方法的典型用法代码示例。如果您正苦于以下问题:Python driver.Stream方法的具体用法?Python driver.Stream怎么用?Python driver.Stream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycuda.driver的用法示例。


在下文中一共展示了driver.Stream方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to float32
    input_img = input_img.astype(np.float32)
    # Create host buffer to receive data
    output = np.empty(output_size, dtype = np.float32)
    # Allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # Synchronize threads
    stream.synchronize()
    # Return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:26,代码来源:mnist_api.py

示例2: inference_image

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def inference_image(context, input_img, batch_size):
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    inp_dims = engine.get_binding_dimensions(0).to_DimsCHW()
    out_dims = engine.get_binding_dimensions(1).to_DimsCHW()
    # output vector size
    output_size = 1000
    # create output array
    output = np.empty(output_size, dtype=np.float32)
    # allocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    # create input/output bindings
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # transfer predictions
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # synchronize threads
    stream.synchronize()
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:27,代码来源:sample_onnx.py

示例3: allocate_buffers

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects. 
开发者ID:aimuch,项目名称:iAI,代码行数:24,代码来源:common.py

示例4: __init__

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def __init__(self, model, input_shape, output_layout=7):
        """Initialize TensorRT plugins, engine and conetxt."""
        self.model = model
        self.input_shape = input_shape
        self.output_layout = output_layout
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self._load_plugins()
        self.engine = self._load_engine()

        self.host_inputs = []
        self.cuda_inputs = []
        self.host_outputs = []
        self.cuda_outputs = []
        self.bindings = []
        self.stream = cuda.Stream()
        self.context = self._create_context() 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:18,代码来源:ssd.py

示例5: allocate_buffers

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def allocate_buffers(engine):
    """Allocates all host/device in/out buffers required for an engine."""
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * \
               engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:23,代码来源:yolov3.py

示例6: __init__

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def __init__(self, layers=None, delta=None, stream = None, max_batch_size=32, max_streams=10, epochs = 10):
        
        self.network = []
        self.network_summary = []
        self.network_mem = []
        
        if stream is not None:
            self.stream = stream
        else:
            self.stream = drv.Stream()
            
            
        if delta is None:
            delta = 0.0001
            
        self.delta = delta
        self.max_batch_size=max_batch_size
        
        self.max_streams = max_streams
        
        self.epochs = epochs
        
        if layers is not None:
            for layer in layers:
                add_layer(self, layer) 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:27,代码来源:deep_neural_network.py

示例7: __init__

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def __init__(self, context: trt.IExecutionContext, stream=None, device=None, cuda_device=None, cuda_context=None):
        self.engine = context.engine
        if device is None:
            self.torch_device = torch.device("cuda:0")
        else:
            self.torch_device = device
        inputs, outputs, bindings = allocate_buffers_torch(self.engine, self.torch_device)
        self.context = context
        self.inputs = inputs
        self.outputs = outputs
        self.bindings = bindings
        self.input_dict = {mem.name: mem for mem in inputs}
        self.output_dict = {mem.name: mem for mem in outputs}
        if stream is None:
            self.stream = cuda.Stream()
        self._batch_size = None
        self.cuda_device = cuda_device
        self.cuda_context = cuda_context 
开发者ID:traveller59,项目名称:torch2trt,代码行数:20,代码来源:inference.py

示例8: get_async

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def get_async(self, stream):
        """Copy and return the host buffer data.

        Parameters
        ----------
        stream : pycuda.driver.Stream
            The cuda stream to copy data.

        Returns
        -------
        numpy.array
            The numpy array taking the data.

        """
        src = self.device_buffer
        dst = self.host_buffer
        src.get_async(stream, dst)
        return dst 
开发者ID:seetaresearch,项目名称:dragon,代码行数:20,代码来源:engine.py

示例9: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #synchronize threads
    stream.synchronize()

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:caffe_mnist.py

示例10: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:29,代码来源:uff_mnist.py

示例11: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(context, input_img, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:tf_to_trt.py

示例12: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)

    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size

    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:31,代码来源:onnx_mnist.py

示例13: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(context, input_img, output_size, batch_size):
    # Load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # Convert input data to Float32
    input_img = input_img.astype(np.float32)
    # Create output array to receive data
    output = np.empty(output_size, dtype = np.float32)

    # Alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # Execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # Transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    # Return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:28,代码来源:custom_layers.py

示例14: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(context, input_img, output_size, batch_size):
    #load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    #convert input data to Float32
    input_img = input_img.astype(np.float32)
    #create output array to receive data 
    output = np.empty(output_size, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:28,代码来源:caffe_mnist.py

示例15: infer

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Stream [as 别名]
def infer(engine, input_img, batch_size):
    #load engine
    context = engine.create_execution_context()
    assert(engine.get_nb_bindings() == 2)
    #create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    #Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype = np.float32)

    #alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)

    bindings = [int(d_input), int(d_output)]

    stream = cuda.Stream()

    #transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    #execute model 
    context.enqueue(batch_size, bindings, stream.handle, None)
    #transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)

    #return predictions
    return output 
开发者ID:aimuch,项目名称:iAI,代码行数:29,代码来源:uff_mnist.py


注:本文中的pycuda.driver.Stream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。