本文整理匯總了Python中pycuda.driver.Device方法的典型用法代碼示例。如果您正苦於以下問題:Python driver.Device方法的具體用法?Python driver.Device怎麽用?Python driver.Device使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.driver
的用法示例。
在下文中一共展示了driver.Device方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: run
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def run(self):
"""Run until 'running' flag is set to False by main thread.
NOTE: CUDA context is created here, i.e. inside the thread
which calls CUDA kernels. In other words, creating CUDA
context in __init__() doesn't work.
"""
global s_img, s_boxes, s_confs, s_clss
print('TrtThread: loading the TRT SSD engine...')
self.cuda_ctx = cuda.Device(0).make_context() # GPU 0
self.trt_ssd = TrtSSD(self.model, INPUT_HW)
print('TrtThread: start running...')
self.running = True
while self.running:
img = self.cam.read()
boxes, confs, clss = self.trt_ssd.detect(img, self.conf_th)
with self.condition:
s_img, s_boxes, s_confs, s_clss = img, boxes, confs, clss
self.condition.notify()
del self.trt_ssd
self.cuda_ctx.pop()
del self.cuda_ctx
print('TrtThread: stopped...')
示例2: __init__
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def __init__(self, device_id=None, enable_winograd=True, deterministic=True,
scratch_size=0):
try:
drv.init()
except drv.LogicError:
sys.exit(PYCUDA_LOGIC_ERROR_CODE)
self.device_id = int(device_id) if device_id is not None else 0
# check compute capability
self.compute_capability = drv.Device(self.device_id).compute_capability()
if self.compute_capability[0] < 3:
raise RuntimeError("Unsupported GPU")
# context
self.ctx = drv.Device(self.device_id).make_context()
# attributes
self.stream = None
self.warmup = False
self.scratch_size = scratch_size
self.scratch_offset = 0
# Fall back to CUDA C kernels on older (pre-Maxwell) GPU generations
if self.compute_capability[0] < 5:
# TODO: this is not fully supported in graph yet
self.use_cudac_kernels = True
else:
self.use_cudac_kernels = False
# TODO
# self.cublas_handle = cublas.cublasCreate()
self.enable_winograd = enable_winograd
self.deterministic = deterministic
示例3: __init__
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def __init__(self, deviceID):
"""
Args:
deviceID (int): Device ID for GPU.
"""
self.deviceID = deviceID
self.name = None
self.pcibusID = None
self.constmem = None
self.totalmem = None
示例4: get_gpu_info
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def get_gpu_info(self, drv):
"""Set information about GPU.
Args:
drv (object): PyCuda driver.
"""
self.name = drv.Device(self.deviceID).name()
self.pcibusID = drv.Device(self.deviceID).pci_bus_id()
self.constmem = drv.Device(self.deviceID).total_constant_memory
self.totalmem = drv.Device(self.deviceID).total_memory()
示例5: gpuWordCount
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def gpuWordCount(self):
def gpuFunc(iterator):
# 1. Data preparation
iterator = iter(iterator)
cpu_data = list(iterator)
cpu_dataset = " ".join(cpu_data)
ascii_data = np.asarray([ord(x) for x in cpu_dataset], dtype=np.uint8)
# 2. Driver initialization and data transfer
cuda.init()
dev = cuda.Device(0)
contx = dev.make_context()
gpu_dataset = gpuarray.to_gpu(ascii_data)
# 3. GPU kernel.
# The kernel's algorithm counts the words by keeping
# track of the space between them
countkrnl = reduction.ReductionKernel(long, neutral = "0",
map_expr = "(a[i] == 32)*(b[i] != 32)",
reduce_expr = "a + b", arguments = "char *a, char *b")
results = countkrnl(gpu_dataset[:-1],gpu_dataset[1:]).get()
yield results
# Release GPU context resources
contx.pop()
del gpu_dataset
del contx
gc.collect()
vals = self.rdd.mapPartitions(gpuFunc)
return vals
示例6: _init_gpu
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def _init_gpu(comm):
""" Chooses a gpu and creates a context on it. """
# Find out how many GPUs are available to us on this node.
driver.init()
num_gpus = driver.Device.count()
# Figure out the names of the other hosts.
rank = comm.Get_rank() # Find out which process I am.
name = MPI.Get_processor_name() # The name of my node.
hosts = comm.allgather(name) # Get the names of all the other hosts
# Find out which GPU to take (by precedence).
gpu_id = hosts[0:rank].count(name)
if gpu_id >= num_gpus:
raise TypeError('No GPU available.')
# Create a context on the appropriate device.
for k in range(num_gpus):
try:
device = driver.Device((gpu_id + k) % num_gpus)
context = device.make_context()
except:
continue
else:
# print "On %s: process %d taking gpu %d of %d.\n" % \
# (name, rank, gpu_id+k, num_gpus)
break
return device, context # Return device and context.
# Global variable for the global space.
# The leading double underscore should prevent outside modules from accessing
# this variable.
示例7: run
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def run(self):
self.dev = drv.Device(0)
self.context = self.dev.make_context()
self.ker = SourceModule(kernel_code)
self.mult_ker = self.ker.get_function('mult_ker')
self.array_gpu = gpuarray.to_gpu(self.input_array)
self.mult_ker(self.array_gpu, np.int32(array_len), block=(64,1,1), grid=(1,1,1))
self.output_array = self.array_gpu.get()
self.context.pop()
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:17,代碼來源:multi-kernel_multi-thread.py
示例8: ensure_gpu_capability
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def ensure_gpu_capability(device_id):
gpuflag = (get_compute_capability(device_id) >= 3.0)
if gpuflag is False:
raise RuntimeError("Device " + str(device_id) + " does not have CUDA compute " +
"capability 3.0 or greater")
示例9: get_device_count
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def get_device_count(verbose=False):
"""
Query device count through PyCuda.
Arguments:
verbose (bool): prints verbose logging if True, default False.
Returns:
int: Number of GPUs available.
"""
try:
import pycuda
import pycuda.driver as drv
except ImportError:
if verbose:
neon_logger.display("PyCUDA module not found")
return 0
try:
drv.init()
except pycuda._driver.RuntimeError as e:
neon_logger.display("PyCUDA Runtime error: {0}".format(str(e)))
return 0
count = drv.Device.count()
if verbose:
neon_logger.display("Found {} GPU(s)".format(count))
return count
示例10: detect_check_gpus
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def detect_check_gpus(deviceIDs):
"""Get information about Nvidia GPU(s).
Args:
deviceIDs (list): List of integers of device IDs.
Returns:
gpus (list): Detected GPU(s) object(s).
"""
try:
import pycuda.driver as drv
except ImportError:
raise ImportError('To use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).')
drv.init()
# Check and list any CUDA-Enabled GPUs
if drv.Device.count() == 0:
raise GeneralError('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)')
elif 'CUDA_VISIBLE_DEVICES' in os.environ:
deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES')
deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')]
else:
deviceIDsavail = range(drv.Device.count())
# If no device ID is given use default of 0
if not deviceIDs:
deviceIDs = [0]
# Check if requested device ID(s) exist
for ID in deviceIDs:
if ID not in deviceIDsavail:
raise GeneralError('GPU with device ID {} does not exist'.format(ID))
# Gather information about selected/detected GPUs
gpus = []
allgpustext = []
for ID in deviceIDsavail:
gpu = GPU(deviceID=ID)
gpu.get_gpu_info(drv)
if ID in deviceIDs:
gpus.append(gpu)
allgpustext.append('{} - {}, {}'.format(gpu.deviceID, gpu.name, human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)))
return gpus, allgpustext
示例11: fun_load
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def fun_load(config, sock_data=5000):
send_queue = config['queue_l2t']
recv_queue = config['queue_t2l']
# recv_queue and send_queue are multiprocessing.Queue
# recv_queue is only for receiving
# send_queue is only for sending
# if need to do random crop and mirror
flag_batch = config['batch_crop_mirror']
drv.init()
dev = drv.Device(int(config['gpu'][-1]))
ctx = dev.make_context()
sock = zmq.Context().socket(zmq.PAIR)
sock.bind('tcp://*:{0}'.format(sock_data))
shape, dtype, h = sock.recv_pyobj()
print 'shared_x information received'
gpu_data_remote = gpuarray.GPUArray(shape, dtype,
gpudata=drv.IPCMemoryHandle(h))
gpu_data = gpuarray.GPUArray(shape, dtype)
img_mean = recv_queue.get()
print 'img_mean received'
# The first time, do the set ups and other stuff
# receive information for loading
while True:
# getting the hkl file name to load
hkl_name = recv_queue.get()
# print hkl_name
data = hkl.load(hkl_name) - img_mean
# print 'load ', time.time() - bgn_time
param_rand = recv_queue.get()
data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)
gpu_data.set(data)
# wait for computation on last minibatch to finish
msg = recv_queue.get()
assert msg == 'calc_finished'
drv.memcpy_peer(gpu_data_remote.ptr,
gpu_data.ptr,
gpu_data.dtype.itemsize *
gpu_data.size,
ctx, ctx)
ctx.synchronize()
send_queue.put('copy_finished')
示例12: get_compute_capability
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def get_compute_capability(device_id=None, verbose=False):
"""
Query compute capability through PyCuda and check it's 5.0 (Maxwell) or
greater.
5.0 (GTX750 Ti) only fp32 support
5.2 (GTX9xx series) required for fp16
By default, check all devices and return the highest compute capability.
Arguments:
device_id (int): CUDA device id. Default to None, will iterate over
all devices if None.
verbose (bool): prints verbose logging if True, default False.
Returns:
float: Zero if no GPU is found, otherwise highest compute capability.
"""
try:
import pycuda
import pycuda.driver as drv
except ImportError:
if verbose:
neon_logger.display("PyCUDA module not found")
return 0
try:
drv.init()
except pycuda._driver.RuntimeError as e:
neon_logger.display("PyCUDA Runtime error: {0}".format(str(e)))
return 0
major_string = pycuda._driver.device_attribute.COMPUTE_CAPABILITY_MAJOR
minor_string = pycuda._driver.device_attribute.COMPUTE_CAPABILITY_MINOR
full_version = []
if device_id is None:
device_id = list(range(drv.Device.count()))
elif isinstance(device_id, int):
device_id = [device_id]
for i in device_id:
major = drv.Device(i).get_attribute(major_string)
minor = drv.Device(i).get_attribute(minor_string)
full_version += [major + minor / 10.]
if verbose:
neon_logger.display("Found GPU(s) with compute capability: {}".format(full_version))
return max(full_version)
示例13: init_device
# 需要導入模塊: from pycuda import driver [as 別名]
# 或者: from pycuda.driver import Device [as 別名]
def init_device(device='gpu0'):
if device.startswith('cuda'):
import os
if 'THEANO_FLAGS' in os.environ:
raise ValueError('Use theanorc to set the theano config')
os.environ['THEANO_FLAGS'] = 'device={0}'.format(device)
import theano.gpuarray
# This is a bit of black magic that may stop working in future
# theano releases
ctx = theano.gpuarray.type.get_context(None)
drv = None
elif device.startswith('gpu'):
gpuid = int(device[-1])
import pycuda.driver as drv
drv.init()
dev = drv.Device(gpuid)
ctx = dev.make_context()
import theano.sandbox.cuda
theano.sandbox.cuda.use(device)
import theano
else:
drv=None
ctx=None
import theano.sandbox.cuda
theano.sandbox.cuda.use(device)
import theano
from theano import function, config, shared, sandbox, tensor
vlen = 10 * 30 * 768 # 10 x #cores x # threads per core
iters = 1000
rng = np.random.RandomState(22)
arr = rng.rand(vlen)
shared_x = theano.shared(np.asarray(arr, config.floatX))
shared_xx = theano.shared(np.asarray(arr, config.floatX))
x=tensor.fvector("x")
# compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray)
f = function([], tensor.exp(x), givens=[(x,shared_x)])
if np.any([isinstance(x.op, tensor.Elemwise) and
('Gpu' not in type(x.op).__name__)
for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
# if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu':
# raise TypeError('graph not compiled on GPU')
return drv,ctx, arr, shared_x, shared_xx