当前位置: 首页>>代码示例>>Python>>正文


Python driver.init方法代码示例

本文整理汇总了Python中pycuda.driver.init方法的典型用法代码示例。如果您正苦于以下问题:Python driver.init方法的具体用法?Python driver.init怎么用?Python driver.init使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycuda.driver的用法示例。


在下文中一共展示了driver.init方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def main():
    args = parse_args()
    cam = Camera(args)
    cam.open()
    if not cam.is_opened:
        sys.exit('Failed to open camera!')

    cls_dict = get_cls_dict(args.model.split('_')[-1])

    cuda.init()  # init pycuda driver

    cam.start()  # let camera start grabbing frames
    open_window(WINDOW_NAME, args.image_width, args.image_height,
                'Camera TensorRT SSD Demo for Jetson Nano')
    vis = BBoxVisualization(cls_dict)
    condition = threading.Condition()
    trt_thread = TrtThread(condition, cam, args.model, conf_th=0.3)
    trt_thread.start()  # start the child thread
    loop_and_display(condition, vis)
    trt_thread.stop()   # stop the child thread

    cam.stop()
    cam.release()
    cv2.destroyAllWindows() 
开发者ID:jkjung-avt,项目名称:tensorrt_demos,代码行数:26,代码来源:trt_ssd_async.py

示例2: test_vector_add

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def test_vector_add():
    #Check pycuda is installed and if a CUDA capable device is present, if not skip the test
    try:
        import pycuda.driver as drv
        drv.init()
    except (ImportError, Exception):
        pytest.skip("PyCuda not installed or no CUDA device detected")

    kernel_string = """
    __global__ void vector_add(float *c, float *a, float *b, int n) {
        int i = blockIdx.x * block_size_x + threadIdx.x;
        if (i<n) {
            c[i] = a[i] + b[i];
        }
    }
    """

    size = 10000000
    problem_size = (size, 1)

    a = numpy.random.randn(size).astype(numpy.float32)
    b = numpy.random.randn(size).astype(numpy.float32)
    c = numpy.zeros_like(b)
    n = numpy.int32(size)

    args = [c, a, b, n]
    params = {"block_size_x": 512}

    answer = run_kernel("vector_add", kernel_string, problem_size, args, params)

    assert numpy.allclose(answer[0], a+b, atol=1e-8) 
开发者ID:benvanwerkhoven,项目名称:kernel_tuner,代码行数:33,代码来源:test_vector_add.py

示例3: __init__

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def __init__(self, device_id=None, enable_winograd=True, deterministic=True,
                 scratch_size=0):

        try:
            drv.init()
        except drv.LogicError:
            sys.exit(PYCUDA_LOGIC_ERROR_CODE)

        self.device_id = int(device_id) if device_id is not None else 0
        # check compute capability
        self.compute_capability = drv.Device(self.device_id).compute_capability()
        if self.compute_capability[0] < 3:
            raise RuntimeError("Unsupported GPU")

        # context
        self.ctx = drv.Device(self.device_id).make_context()
        # attributes
        self.stream = None
        self.warmup = False
        self.scratch_size = scratch_size
        self.scratch_offset = 0

        # Fall back to CUDA C kernels on older (pre-Maxwell) GPU generations
        if self.compute_capability[0] < 5:
            # TODO: this is not fully supported in graph yet
            self.use_cudac_kernels = True
        else:
            self.use_cudac_kernels = False

        # TODO
        # self.cublas_handle = cublas.cublasCreate()

        self.enable_winograd = enable_winograd
        self.deterministic = deterministic 
开发者ID:NervanaSystems,项目名称:ngraph-python,代码行数:36,代码来源:gputransform.py

示例4: gpuWordCount

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def gpuWordCount(self):
	def gpuFunc(iterator):
	    # 1. Data preparation
            iterator = iter(iterator)
            cpu_data = list(iterator)
            cpu_dataset = " ".join(cpu_data)
            ascii_data = np.asarray([ord(x) for x in cpu_dataset], dtype=np.uint8)

	    # 2. Driver initialization and data transfer
	    cuda.init()
	    dev = cuda.Device(0)
	    contx = dev.make_context()
            gpu_dataset = gpuarray.to_gpu(ascii_data)

	    # 3. GPU kernel.
	    # The kernel's algorithm counts the words by keeping 
	    # track of the space between them
            countkrnl = reduction.ReductionKernel(long, neutral = "0",
            		map_expr = "(a[i] == 32)*(b[i] != 32)",
                        reduce_expr = "a + b", arguments = "char *a, char *b")

            results = countkrnl(gpu_dataset[:-1],gpu_dataset[1:]).get()
            yield results

	    # Release GPU context resources
	    contx.pop() 
	    del gpu_dataset
            del contx
	   
	    gc.collect()            
		    	
    	vals = self.rdd.mapPartitions(gpuFunc)
	return vals 
开发者ID:adobe-research,项目名称:spark-gpu,代码行数:35,代码来源:wordcount_mapp.py

示例5: _init_gpu

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def _init_gpu(comm):
    """ Chooses a gpu and creates a context on it. """
    # Find out how many GPUs are available to us on this node.
    driver.init()
    num_gpus = driver.Device.count()

    # Figure out the names of the other hosts.
    rank = comm.Get_rank() # Find out which process I am.
    name = MPI.Get_processor_name() # The name of my node.
    hosts = comm.allgather(name) # Get the names of all the other hosts

    # Find out which GPU to take (by precedence).
    gpu_id = hosts[0:rank].count(name)
    if gpu_id >= num_gpus:
        raise TypeError('No GPU available.')

    
    # Create a context on the appropriate device.
    for k in range(num_gpus):
        try:
            device = driver.Device((gpu_id + k) % num_gpus)
            context = device.make_context()
        except:
            continue
        else:
#             print "On %s: process %d taking gpu %d of %d.\n" % \
#                 (name, rank, gpu_id+k, num_gpus)
            break

    return device, context # Return device and context.

# Global variable for the global space.
# The leading double underscore should prevent outside modules from accessing
# this variable. 
开发者ID:stanfordnqp,项目名称:maxwell-b,代码行数:36,代码来源:space.py

示例6: get_num_gpus

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def get_num_gpus():
    """Returns the number of GPUs available"""
    logging.debug("Determining number of GPUs...")
    from pycuda import driver 
    driver.init()
    num_gpus = driver.Device.count()
    logging.debug("Number of GPUs: {}".format(num_gpus))
    return num_gpus 
开发者ID:vlimant,项目名称:mpi_learn,代码行数:10,代码来源:utils.py

示例7: get_device_count

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def get_device_count(verbose=False):
    """
    Query device count through PyCuda.

    Arguments:
        verbose (bool): prints verbose logging if True, default False.

    Returns:
        int: Number of GPUs available.
    """
    try:
        import pycuda
        import pycuda.driver as drv
    except ImportError:
        if verbose:
            neon_logger.display("PyCUDA module not found")
        return 0
    try:
        drv.init()
    except pycuda._driver.RuntimeError as e:
        neon_logger.display("PyCUDA Runtime error: {0}".format(str(e)))
        return 0

    count = drv.Device.count()

    if verbose:
        neon_logger.display("Found {} GPU(s)".format(count))

    return count 
开发者ID:NervanaSystems,项目名称:neon,代码行数:31,代码来源:check_gpu.py

示例8: detect_check_gpus

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def detect_check_gpus(deviceIDs):
    """Get information about Nvidia GPU(s).

    Args:
        deviceIDs (list): List of integers of device IDs.

    Returns:
        gpus (list): Detected GPU(s) object(s).
    """

    try:
        import pycuda.driver as drv
    except ImportError:
        raise ImportError('To use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).')
    drv.init()

    # Check and list any CUDA-Enabled GPUs
    if drv.Device.count() == 0:
        raise GeneralError('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)')
    elif 'CUDA_VISIBLE_DEVICES' in os.environ:
        deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES')
        deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')]
    else:
        deviceIDsavail = range(drv.Device.count())

    # If no device ID is given use default of 0
    if not deviceIDs:
        deviceIDs = [0]

    # Check if requested device ID(s) exist
    for ID in deviceIDs:
        if ID not in deviceIDsavail:
            raise GeneralError('GPU with device ID {} does not exist'.format(ID))

    # Gather information about selected/detected GPUs
    gpus = []
    allgpustext = []
    for ID in deviceIDsavail:
        gpu = GPU(deviceID=ID)
        gpu.get_gpu_info(drv)
        if ID in deviceIDs:
            gpus.append(gpu)
        allgpustext.append('{} - {}, {}'.format(gpu.deviceID, gpu.name, human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)))

    return gpus, allgpustext 
开发者ID:gprMax,项目名称:gprMax,代码行数:47,代码来源:utilities.py

示例9: fun_load

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def fun_load(config, sock_data=5000):

    send_queue = config['queue_l2t']
    recv_queue = config['queue_t2l']
    # recv_queue and send_queue are multiprocessing.Queue
    # recv_queue is only for receiving
    # send_queue is only for sending

    # if need to do random crop and mirror
    flag_batch = config['batch_crop_mirror']

    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()
    sock = zmq.Context().socket(zmq.PAIR)
    sock.bind('tcp://*:{0}'.format(sock_data))

    shape, dtype, h = sock.recv_pyobj()
    print 'shared_x information received'

    gpu_data_remote = gpuarray.GPUArray(shape, dtype,
                                        gpudata=drv.IPCMemoryHandle(h))
    gpu_data = gpuarray.GPUArray(shape, dtype)

    img_mean = recv_queue.get()
    print 'img_mean received'

    # The first time, do the set ups and other stuff

    # receive information for loading

    while True:
        # getting the hkl file name to load
        hkl_name = recv_queue.get()

        # print hkl_name
        data = hkl.load(hkl_name) - img_mean
        # print 'load ', time.time() - bgn_time

        param_rand = recv_queue.get()

        data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)

        gpu_data.set(data)

        # wait for computation on last minibatch to finish
        msg = recv_queue.get()
        assert msg == 'calc_finished'

        drv.memcpy_peer(gpu_data_remote.ptr,
                        gpu_data.ptr,
                        gpu_data.dtype.itemsize *
                        gpu_data.size,
                        ctx, ctx)

        ctx.synchronize()

        send_queue.put('copy_finished') 
开发者ID:uoguelph-mlrg,项目名称:theano_alexnet,代码行数:60,代码来源:proc_load.py

示例10: get_compute_capability

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def get_compute_capability(device_id=None, verbose=False):
    """
    Query compute capability through PyCuda and check it's 5.0 (Maxwell) or
    greater.
    5.0 (GTX750 Ti) only fp32 support
    5.2 (GTX9xx series) required for fp16
    By default, check all devices and return the highest compute capability.

    Arguments:
        device_id (int): CUDA device id. Default to None, will iterate over
                         all devices if None.
        verbose (bool): prints verbose logging if True, default False.

    Returns:
        float: Zero if no GPU is found, otherwise highest compute capability.
    """
    try:
        import pycuda
        import pycuda.driver as drv
    except ImportError:
        if verbose:
            neon_logger.display("PyCUDA module not found")
        return 0
    try:
        drv.init()
    except pycuda._driver.RuntimeError as e:
        neon_logger.display("PyCUDA Runtime error: {0}".format(str(e)))
        return 0

    major_string = pycuda._driver.device_attribute.COMPUTE_CAPABILITY_MAJOR
    minor_string = pycuda._driver.device_attribute.COMPUTE_CAPABILITY_MINOR
    full_version = []
    if device_id is None:
        device_id = list(range(drv.Device.count()))
    elif isinstance(device_id, int):
        device_id = [device_id]

    for i in device_id:
        major = drv.Device(i).get_attribute(major_string)
        minor = drv.Device(i).get_attribute(minor_string)
        full_version += [major + minor / 10.]

    if verbose:
        neon_logger.display("Found GPU(s) with compute capability: {}".format(full_version))

    return max(full_version) 
开发者ID:NervanaSystems,项目名称:neon,代码行数:48,代码来源:check_gpu.py

示例11: init_device

# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import init [as 别名]
def init_device(device='gpu0'):
  
    if device.startswith('cuda'):
        
        import os
        if 'THEANO_FLAGS' in os.environ:
            raise ValueError('Use theanorc to set the theano config')
        
        os.environ['THEANO_FLAGS'] = 'device={0}'.format(device)
        import theano.gpuarray
        # This is a bit of black magic that may stop working in future
        # theano releases
        ctx = theano.gpuarray.type.get_context(None)
        drv = None
        
    elif device.startswith('gpu'):
        
        gpuid = int(device[-1])

        import pycuda.driver as drv
        drv.init()
        dev = drv.Device(gpuid)
        ctx = dev.make_context()
        import theano.sandbox.cuda
        theano.sandbox.cuda.use(device)
        import theano
    else:
        drv=None
        ctx=None
        import theano.sandbox.cuda
        theano.sandbox.cuda.use(device)
        import theano
        
    from theano import function, config, shared, sandbox, tensor

    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
    iters = 1000

    rng = np.random.RandomState(22)
    arr = rng.rand(vlen)

    shared_x = theano.shared(np.asarray(arr, config.floatX))
    shared_xx = theano.shared(np.asarray(arr, config.floatX))
    
    x=tensor.fvector("x")
    # compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray)
    f = function([], tensor.exp(x), givens=[(x,shared_x)]) 
    
    
    if np.any([isinstance(x.op, tensor.Elemwise) and
                  ('Gpu' not in type(x.op).__name__)
                  for x in f.maker.fgraph.toposort()]):
        print('Used the cpu')
    else:
        print('Used the gpu')

    # if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu':
    #     raise TypeError('graph not compiled on GPU') 

    return drv,ctx, arr, shared_x, shared_xx 
开发者ID:uoguelph-mlrg,项目名称:Theano-MPI,代码行数:62,代码来源:test_exchanger.py


注:本文中的pycuda.driver.init方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。