本文整理汇总了Python中pycuda.driver.init函数的典型用法代码示例。如果您正苦于以下问题:Python init函数的具体用法?Python init怎么用?Python init使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了init函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reconstruct
def reconstruct(opts_path):
"""reconstruct from channel data
"""
opts = loadOptions(opts_path)
# normalize paths according to the platform
opts['extra']['src_dir'] =\
os.path.expanduser(os.path.normpath(opts['extra']['src_dir']))
opts['extra']['dest_dir'] =\
os.path.expanduser(os.path.normpath(opts['extra']['dest_dir']))
# load data from hdf5 files
ind = opts['load']['EXP_START']
if opts['load']['EXP_END'] != -1 and\
opts['load']['EXP_END'] != ind:
notifyCli('WARNING: multiple experiments selected. '
'Only the first dataset will be processed')
chn_data, chn_data_3d = load_hdf5_data(
opts['extra']['dest_dir'], ind)
if opts['unpack']['Show_Image'] != 0:
notifyCli('Currently only Show_Image = 0 is supported.')
# initialize pyCuda environment
cuda.init()
dev = cuda.Device(0)
ctx = dev.make_context()
reImg = reconstruction_3d(chn_data_3d, opts['recon'])
ctx.pop()
del ctx
save_reconstructed_image(reImg, opts['extra']['dest_dir'],
ind, 'tiff', '_3d')
示例2: gpuFunc
def gpuFunc(iterator):
# 1. Data preparation
iterator = iter(iterator)
cpu_data = list(iterator)
cpu_dataset = " ".join(cpu_data)
ascii_data = np.asarray([ord(x) for x in cpu_dataset], dtype=np.uint8)
# 2. Driver initialization and data transfer
cuda.init()
dev = cuda.Device(0)
contx = dev.make_context()
gpu_dataset = gpuarray.to_gpu(ascii_data)
# 3. GPU kernel.
# The kernel's algorithm counts the words by keeping
# track of the space between them
countkrnl = reduction.ReductionKernel(long, neutral = "0",
map_expr = "(a[i] == 32)*(b[i] != 32)",
reduce_expr = "a + b", arguments = "char *a, char *b")
results = countkrnl(gpu_dataset[:-1],gpu_dataset[1:]).get()
yield results
# Release GPU context resources
contx.pop()
del gpu_dataset
del contx
gc.collect()
示例3: __init__
def __init__(self, device_num=0, sync_calls=False):
cuda.init()
#self.context = pycuda.tools.make_default_context()
#self.device = self.context.get_device()
self.device = cuda.Device(device_num)
self.context = self.device.make_context()
self.stream = cuda.Stream()
self.max_block_size = self.device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_X)
self.max_grid_size_x = self.device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_X)
self.max_grid_size_y = self.device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_Y)
self.max_grid_size_x_pow2 = 2 ** log2(self.max_grid_size_x)
self.max_registers = self.device.get_attribute(cuda.device_attribute.MAX_REGISTERS_PER_BLOCK)
self.warp_size = self.device.get_attribute(cuda.device_attribute.WARP_SIZE)
self.gpu = True
self.cuda = True
self._sync_calls = sync_calls
self.allocated = 0
示例4: __init__
def __init__(self, device_number=0, thread_per_block=512, **kwargs):
self.device_number = device_number
self.thread_per_block = thread_per_block
self.device_type = 'nvidia_gpu'
self.language = 'cuda'
self.code_type = 'cu'
try:
import pycuda.driver as cuda
cuda.init()
except Exception as e:
logger.error("Error: CUDA initialization error", exc_info=True)
raise SystemExit
max_devices = cuda.Device.count()
if max_devices == 0:
logger.error("Error: There is no CUDA device (NVIDIA GPU).")
raise SystemExit
elif device_number >= max_devices:
logger.error("Error: The given device_number(%d) is bigger than physical GPU devices(%d)."%(device_number, max_devices))
raise SystemExit
else:
device = cuda.Device(device_number)
context = device.make_context()
import atexit
atexit.register(context.pop)
self.cuda = cuda
self.device = device
self.context = context
示例5: test_vector_add
def test_vector_add():
#Check pycuda is installed and if a CUDA capable device is present, if not skip the test
try:
import pycuda.driver as drv
drv.init()
except (ImportError, Exception):
pytest.skip("PyCuda not installed or no CUDA device detected")
kernel_string = """
__global__ void vector_add(float *c, float *a, float *b, int n) {
int i = blockIdx.x * block_size_x + threadIdx.x;
if (i<n) {
c[i] = a[i] + b[i];
}
}
"""
size = 10000000
problem_size = (size, 1)
a = numpy.random.randn(size).astype(numpy.float32)
b = numpy.random.randn(size).astype(numpy.float32)
c = numpy.zeros_like(b)
n = numpy.int32(size)
args = [c, a, b, n]
params = {"block_size_x": 512}
answer = run_kernel("vector_add", kernel_string, problem_size, args, params)
assert numpy.allclose(answer[0], a+b, atol=1e-8)
示例6: __init__
def __init__(self, shape, dtype=numpy.float32, stream=None, allocator=drv.mem_alloc,cuda_device=0):
try:
drv.init()
ctx = drv.Device(0).make_context()
except RuntimeError:
"device is already initialized! so we ignore this ugly, but works for now"
#which device are we working on
self.cuda_device = cuda_device
#internal shape
self.shape = shape
#internal type
self.dtype = numpy.dtype(dtype)
from pytools import product
#internal size
self.size = product(shape)
self.allocator = allocator
if self.size:
self.gpudata = self.allocator(self.size * self.dtype.itemsize)
else:
self.gpudata = None
self.stream = stream
self._update_kernel_kwargs()
示例7: init_device
def init_device(device='gpu0'):
if device.startswith('cuda'):
import os
if 'THEANO_FLAGS' in os.environ:
raise ValueError('Use theanorc to set the theano config')
os.environ['THEANO_FLAGS'] = 'device={0}'.format(device)
import theano.gpuarray
# This is a bit of black magic that may stop working in future
# theano releases
ctx = theano.gpuarray.type.get_context(None)
drv = None
elif device.startswith('gpu'):
gpuid = int(device[-1])
import pycuda.driver as drv
drv.init()
dev = drv.Device(gpuid)
ctx = dev.make_context()
import theano.sandbox.cuda
theano.sandbox.cuda.use(device)
import theano
else:
drv=None
ctx=None
import theano.sandbox.cuda
theano.sandbox.cuda.use(device)
import theano
from theano import function, config, shared, sandbox, tensor
vlen = 10 * 30 * 768 # 10 x #cores x # threads per core
iters = 1000
rng = np.random.RandomState(22)
arr = rng.rand(vlen)
shared_x = theano.shared(np.asarray(arr, config.floatX))
shared_xx = theano.shared(np.asarray(arr, config.floatX))
x=tensor.fvector("x")
# compile a function so that shared_x will be set to part of a computing graph on GPU (CUDAndarray)
f = function([], tensor.exp(x), givens=[(x,shared_x)])
if np.any([isinstance(x.op, tensor.Elemwise) and
('Gpu' not in type(x.op).__name__)
for x in f.maker.fgraph.toposort()]):
print('Used the cpu')
else:
print('Used the gpu')
# if np.any([isinstance(x.op, tensor.Elemwise) for x in f.maker.fgraph.toposort()]) and device!='cpu':
# raise TypeError('graph not compiled on GPU')
return drv,ctx, arr, shared_x, shared_xx
示例8: n_blocks
def n_blocks(self):
n_blocks = self.opts.get('n_blocks')
if n_blocks is None:
default_threads_per_block = 32
bytes_per_float = 4
memory_per_thread = (self._len_species + 1) * bytes_per_float
if cuda is None:
threads_per_block = default_threads_per_block
else:
cuda.init()
device = cuda.Device(self.gpu[0])
attrs = device.get_attributes()
shared_memory_per_block = attrs[
cuda.device_attribute.MAX_SHARED_MEMORY_PER_BLOCK]
upper_limit_threads_per_block = attrs[
cuda.device_attribute.MAX_THREADS_PER_BLOCK]
max_threads_per_block = min(
shared_memory_per_block / memory_per_thread,
upper_limit_threads_per_block)
threads_per_block = min(max_threads_per_block,
default_threads_per_block)
n_blocks = int(
np.ceil(1. * len(self.param_values) / threads_per_block))
self._logger.debug('n_blocks set to {} (used pycuda: {})'.format(
n_blocks, cuda is not None
))
self.n_blocks = n_blocks
return n_blocks
示例9: _init_gpu
def _init_gpu(self):
"""
Initialize GPU device.
Notes
-----
Must be called from within the `run()` method, not from within
`__init__()`.
"""
if self.device == None:
self.log_info('no GPU specified - not initializing ')
else:
# Import pycuda.driver here so as to facilitate the
# subclassing of Module to create pure Python LPUs that don't use GPUs:
import pycuda.driver as drv
drv.init()
N_gpu = drv.Device.count()
if not self.device < N_gpu:
new_device = randint(0,N_gpu - 1)
self.log_warning("GPU device device %d not in GPU devices %s" % (self.device, str(range(0,N_gpu))))
self.log_warning("Setting device = %d" % new_device)
self.device = new_device
try:
self.gpu_ctx = drv.Device(self.device).make_context()
except Exception as e:
self.log_info('_init_gpu exception: ' + e.message)
else:
atexit.register(self.gpu_ctx.pop)
self.log_info('GPU %s initialized' % self.device)
示例10: get_device_count
def get_device_count(verbose=False):
"""
Query device count through PyCuda.
Arguments:
verbose (bool): prints verbose logging if True, default False.
Returns:
int: Number of GPUs available.
"""
try:
import pycuda
import pycuda.driver as drv
except ImportError:
if verbose:
print("PyCUDA module not found")
return 0
try:
drv.init()
except pycuda._driver.RuntimeError as e:
print("PyCUDA Runtime error: {0}".format(str(e)))
return 0
count = drv.Device.count()
if verbose:
print "Found %d GPU(s)", count
return count
示例11: _init_gpu
def _init_gpu(self):
"""
Initialize GPU device.
Notes
-----
Must be called from within the `run()` method, not from within
`__init__()`.
"""
if self.device == None:
self.log_info('no GPU specified - not initializing ')
else:
# Import pycuda.driver here so as to facilitate the
# subclassing of Module to create pure Python LPUs that don't use GPUs:
import pycuda.driver as drv
drv.init()
try:
self.gpu_ctx = drv.Device(self.device).make_context()
except Exception as e:
self.log_info('_init_gpu exception: ' + e.message)
else:
atexit.register(self.gpu_ctx.pop)
self.log_info('GPU initialized')
示例12: choose_gpu
def choose_gpu():
# Find out how many GPUs are available to us on this node.
drv.init()
num_gpus = drv.Device.count()
# Figure out the names of the other hosts.
rank = MPI.COMM_WORLD.Get_rank() # Find out which process I am.
name = MPI.Get_processor_name() # The name of my node.
hosts = MPI.COMM_WORLD.allgather(name) # Get the names of all the other hosts
# Figure out our precendence on this node.
# Make sure the number of hosts and processes are equal.
num_processes = MPI.COMM_WORLD.Get_size()
if (len(hosts) is not num_processes):
raise TypeError('Number of hosts and number of processes do not match.')
# Make sure the name of my node matches.
if (name != hosts[rank]):
# print name, hosts[rank]
raise TypeError('Hostname does not match.')
# Find out which GPU to take.
gpu_id = hosts[0:rank].count(name)
if gpu_id >= num_gpus:
raise TypeError('No GPU available.')
# sys.stdout.write("On %s: %d/%d taking gpu %d/%d.\n" % \
# (name, rank, num_processes, gpu_id, num_gpus))
# Make and return a context on the device.
return drv.Device(gpu_id).make_context()
示例13: worker
def worker():
comm = MPI.Comm.Get_parent()
size = comm.Get_size()
rank = comm.Get_rank()
name = MPI.Get_processor_name()
import pycuda.driver as drv
drv.init()
# Find maximum number of available GPUs:
max_gpus = drv.Device.count()
# Use modular arithmetic to avoid assigning a nonexistent GPU:
n = rank % max_gpus
dev = drv.Device(n)
ctx = dev.make_context()
atexit.register(ctx.pop)
# Execute a kernel:
import pycuda.gpuarray as gpuarray
from pycuda.elementwise import ElementwiseKernel
kernel = ElementwiseKernel('double *y, double *x, double a',
'y[i] = a*x[i]')
x_gpu = gpuarray.to_gpu(np.random.rand(2))
y_gpu = gpuarray.empty_like(x_gpu)
kernel(y_gpu, x_gpu, np.double(2.0))
print 'I am process %d of %d on CPU %s using GPU %s of %s [x_gpu=%s, y_gpu=%s]' % \
(rank, size, name, n, max_gpus, str(x_gpu.get()), str(y_gpu.get()))
comm.Disconnect()
示例14: fun_load
def fun_load(config, sock_data=5000):
send_queue = config['queue_l2t']
recv_queue = config['queue_t2l']
# recv_queue and send_queue are multiprocessing.Queue
# recv_queue is only for receiving
# send_queue is only for sending
# if need to do random crop and mirror
flag_randproc = not config['use_data_layer']
flag_batch = config['batch_crop_mirror']
drv.init()
dev = drv.Device(int(config['gpu'][-1]))
ctx = dev.make_context()
sock = zmq.Context().socket(zmq.PAIR)
sock.bind('tcp://*:{0}'.format(sock_data))
shape, dtype, h = sock.recv_pyobj()
print 'shared_x information received', shape, dtype
shape = (3, 255, 255, 256) # TODO remove fix
gpu_data_remote = gpuarray.GPUArray(shape, dtype,
gpudata=drv.IPCMemoryHandle(h))
gpu_data = gpuarray.GPUArray(shape, dtype)
img_mean = recv_queue.get()
print 'img_mean received'
# The first time, do the set ups and other stuff
# receive information for loading
while True:
# getting the hkl file name to load
hkl_name = recv_queue.get()
# print hkl_name
#data = pickle.load(open(hkl_name)) - img_mean
data = hkl.load(hkl_name) - img_mean
# print 'load ', time.time() - bgn_time
if flag_randproc:
param_rand = recv_queue.get()
data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)
gpu_data.set(data)
# wait for computation on last minibatch to finish
msg = recv_queue.get()
assert msg == 'calc_finished'
drv.memcpy_peer(gpu_data_remote.ptr,
gpu_data.ptr,
gpu_data.dtype.itemsize *
gpu_data.size,
ctx, ctx)
ctx.synchronize()
send_queue.put('copy_finished')
示例15: _init_gpu
def _init_gpu(comm):
""" Chooses a gpu and creates a context on it. """
# Find out how many GPUs are available to us on this node.
driver.init()
num_gpus = driver.Device.count()
# Figure out the names of the other hosts.
rank = comm.Get_rank() # Find out which process I am.
name = MPI.Get_processor_name() # The name of my node.
hosts = comm.allgather(name) # Get the names of all the other hosts
# Find out which GPU to take (by precedence).
gpu_id = hosts[0:rank].count(name)
if gpu_id >= num_gpus:
raise TypeError("No GPU available.")
# Create a context on the appropriate device.
for k in range(num_gpus):
try:
device = driver.Device((gpu_id + k) % num_gpus)
context = device.make_context()
except:
continue
else:
# print "On %s: process %d taking gpu %d of %d.\n" % \
# (name, rank, gpu_id+k, num_gpus)
break
return device, context # Return device and context.