本文整理汇总了Python中pycuda.driver.Context类的典型用法代码示例。如果您正苦于以下问题:Python Context类的具体用法?Python Context怎么用?Python Context使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Context类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calcV
def calcV(I_shape, I_cu, V_cu):
#Ifull = I
Ci = I_shape[0]
iH = I_shape[1]
iW = I_shape[2]
N = I_shape[3]
tiles = iW // 4
oH = iH
oW = iW
padH = 1
padW = 1
# adapted from winograd_conv.py
#if N == 1:
# shlN = 0
#elif N < 32:
# shlN = len(bin(N-1))-2
#else:
# shlN = 5
shlN = 5
shlY, shlX, maskY, shrY, maskX, shrX, maskN, supY, supX = {
0 : (4, 5, 0x18, 3, 0x07, 0, 0x00, 0x203, 0x300), # 4x8 yyxxx
1 : (4, 4, 0x18, 3, 0x06, 1, 0x01, 0x203, 0x201), # 4x4 yyxxn
2 : (3, 4, 0x10, 4, 0x0c, 2, 0x03, 0x104, 0x202), # 2x4 yxxnn
3 : (2, 4, 0x00, 0, 0x18, 3, 0x07, 0x000, 0x203), # 1x4 xxnnn
4 : (2, 3, 0x00, 0, 0x10, 4, 0x0f, 0x000, 0x104), # 1x2 xnnnn
5 : (2, 2, 0x00, 0, 0x00, 0, 0x1f, 0x000, 0x000), # 1x1 nnnnn
}.get(shlN)
GYS = ceil_div(oH, 1 << shlY)
GXS = ceil_div(oW, 1 << shlX)
GN = ceil_div(N, 1 << shlN)
# GK = ceil_div(Co, 32)
GYS2 = GYS // 2
GXS2 = GXS * 2
div_GXS2 = get_div_mul_shift_32(GXS * GYS, GXS2)
div_GXS = get_div_mul_shift_32(GXS * GYS, GXS)
image_size = 1152*Ci*GXS*GYS*GN
print('div_GXS', div_GXS)
print('GYS', GYS, 'GXS', GXS, 'GN', GN, 'Ci', Ci, 'GY_GX', GXS * GYS)
grid = (GN, GYS*GXS, Ci)
block = (32, 1, 1)
call_cu_kernel(
k_calcV,
grid, block,
V_cu, I_cu,
iH, iW, N, padH, padW,
GXS, GYS2, GXS2, div_GXS2[0], div_GXS2[1], div_GXS[0], div_GXS[1],
shlY, shlX, maskY, shrY, maskX, shrX, shlN, maskN,
iH * iW * N, iW * N, GYS*GXS*Ci*1152, GXS * Ci * 1152, Ci * 1152,
GXS, GXS * GYS, GN, Ci)
Context.synchronize()
timecheck('calced V_cu')
示例2: init_the_device_if_needed
def init_the_device_if_needed(do_it_anyway=False):
if do_it_anyway:
print 'import pycuda.autoinit'
import pycuda.autoinit
return
try:
Context.get_device()
except:
# Presumably, the line above failed because of something like that:
# "LogicError: cuCtxGetDevice failed: not initialized"
# -- initialize the device
print 'import pycuda.autoinit'
import pycuda.autoinit
示例3: mem_alloc
def mem_alloc(nbytes):
"""Allocates device memory of given size from memory pool.
This function chooses memory pool corresponding to the current device.
Args:
nbytes (int): The size of memory in bytes.
Returns:
pycuda.tools.PooledDeviceAllocation: Allocated memory with additional
``device`` attribute. This attribute is used to determine on which GPU
the memory resides.
"""
global _pools
device = Context.get_device()
pool = _pools.get(device, None)
if pool is None:
pool = drv.DeviceMemoryPool()
_pools[device] = pool
allocation = pool.allocate(nbytes)
setattr(allocation, 'device', device)
return allocation
示例4: get_device
def get_device(arg=None):
"""Gets the device from ID ''arg'' or given chainer's
:class:`~pycuda.gpuarray.GPUArray`.
Args:
arg: Value to specify a GPU device.
Returns:
Device object specified by given ``arg``.
The rule of device selection is following.
==================================== =====================================
Type of ``arg`` Return value
==================================== =====================================
``None`` Current device
``int`` Device of ID ``arg``
:class:`~pycuda.driver.Device` ``arg``
:class:`~pycuda.gpuarray.GPUArray` Device given array was allocated on
:class:`~numpy.ndarray` ``None``
==================================== =====================================
"""
if arg is None:
return Context.get_device()
elif isinstance(arg, Device):
return arg
elif isinstance(arg, numpy.ndarray):
return None
elif isinstance(arg, GPUArray):
while not hasattr(arg.gpudata, 'device'):
arg = arg.base
return arg.gpudata.device
return drv.Device(arg)
示例5: compile
def compile(source, nvcc="nvcc", options=None, keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[], target="cubin"):
assert target in ["cubin", "ptx", "fatbin"]
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if arch is None:
from pycuda.driver import Error
try:
from pycuda.driver import Context
arch = "sm_%d%d" % Context.get_device().compute_capability()
except Error:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if cache_dir is None:
from os.path import join
import appdirs
cache_dir = os.path.join(appdirs.user_cache_dir("pycuda", "pycuda"),
"compiler-cache-v1")
from os import makedirs
try:
makedirs(cache_dir)
except OSError as e:
from errno import EEXIST
if e.errno != EEXIST:
raise
if arch is not None:
options.extend(["-arch", arch])
if code is not None:
options.extend(["-code", code])
if 'darwin' in sys.platform and sys.maxint == 9223372036854775807:
options.append('-m64')
elif 'win32' in sys.platform and sys.maxsize == 9223372036854775807:
options.append('-m64')
elif 'win32' in sys.platform and sys.maxsize == 2147483647:
options.append('-m32')
include_dirs = include_dirs + [_find_pycuda_include_path()]
for i in include_dirs:
options.append("-I"+i)
return compile_plain(source, options, keep, nvcc, cache_dir, target)
示例6: compile
def compile(source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
options = options[:]
if arch is None:
try:
from pycuda.driver import Context
arch = "sm_%d%d" % Context.get_device().compute_capability()
except RuntimeError:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if cache_dir is None:
from os.path import join
from tempfile import gettempdir
cache_dir = join(gettempdir(),
"pycuda-compiler-cache-v1-%s" % _get_per_user_string())
from os import mkdir
try:
mkdir(cache_dir)
except OSError, e:
from errno import EEXIST
if e.errno != EEXIST:
raise
示例7: calcO
def calcO(O_cu, M_shape, M_cu):
GK = M_shape[2]
GN = M_shape[0]
tiles = M_shape[4]
num_xinu_tiles = GK * 32 * GN * 32 * tiles * tiles
grid = (ceil_div(num_xinu_tiles, 32), 1, 1)
block = (32, 1, 1)
call_cu_kernel(
k_calcO,
grid, block,
O_cu, M_cu,
num_xinu_tiles
)
Context.synchronize()
timecheck('calced O_cu')
示例8: calcM
def calcM(N, Co, M_cu, U_shape, U_cu, V_shape, V_cu):
Co = (U_shape[2] - 1) * 32 + U_shape[4]
Ci = U_shape[3]
GK = ceil_div(Co, 32)
tiles = V_shape[4]
GN = V_shape[2]
print('GK', GK, 'GN', GN, 'tiles', tiles, 'Co', Co, 'Ci', Ci, 'N', N)
grid = (tiles * tiles,1,1) # b
block = (32, 16, 1) # 16 for intel...
call_cu_kernel(
k_calcM,
grid, block,
M_cu, U_cu, V_cu,
Ci, 1, tiles, GN, GK) #,
# cl.LocalMemory(32 * 32 * 4), cl.LocalMemory(32 * 32 * 4))
Context.synchronize()
timecheck('calced M_cu')
示例9: ensure_pycuda_context
def ensure_pycuda_context():
global pycuda_context, pycuda_initialized
if not pycuda_initialized:
if Context is None:
raise RuntimeError("PyCUDA not found or too old.")
else:
pycuda_context = Context.attach()
import atexit
atexit.register(pycuda_context.detach)
pycuda_initialized = True
return pycuda_context
示例10: _check_arch
def _check_arch(self, arch):
if arch is None: return
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
示例11: init
def init(device=None):
"""Initializes CUDA global state.
Chainer maintains CUDA context, CUBLAS context, random number generator and
device memory pool for each GPU device and for each process (the main
process or a process forked by :mod:`multiprocessing`) as global states. When
called for the first time on the process, this function initializes these global states.
.. warning::
This function also initializes PyCUDA and scikits.cuda. Since these
packages do not support forking after initialization, do not call this
function before forking the process.
This function also registers :func:`shutdown` to :mod:`atexit` slot.
It also initializes random number generator. User can set fixed seed with
``CHAINER_SEED`` environment variable.
Args:
device (``int`` or :class:`~pycuda.driver.Device` or ``None``): Device
ID to initialize on.
"""
global _contexts, _cublas_handles, _generators, _pid, _pools
if not available:
global _import_error
raise RuntimeError(
'CUDA environment is not correctly set up. ' +
'The original import error said: ' + str(_import_error))
pid = os.getpid()
if _pid == pid: # already initialized
return
drv.init()
if device is None: # use default device
context = cutools.make_default_context()
device = Context.get_device()
else:
device = Device(device)
context = device.make_context()
_contexts = {device: context}
_generators = {}
_pools = {}
_cublas_handles = {}
cumisc.init(mem_alloc)
seed(os.environ.get('CHAINER_SEED'))
_pid = pid # mark as initialized
atexit.register(shutdown)
示例12: calcU
def calcU(W_shape, W_cu, U_cu):
Ci = W_shape[0]
kH = W_shape[1]
kW = W_shape[2]
Co = W_shape[3]
# this is adapted from neon's winograd_conv.py:
GK = ceil_div(Co, 32)
filter_size = 1152*Ci*GK
grid = (GK, Ci, 1)
block = (32, 1, 1)
call_cu_kernel(
k_calcU,
grid, block,
U_cu, W_cu,
kH * kW * Co, kW * Co, kW * Co * 2, Co, Ci * 1152,
Ci, GK)
Context.synchronize()
timecheck('calced U_cu')
示例13: get_cublas_handle
def get_cublas_handle():
"""Gets CUBLAS handle for the current device.
Returns:
CUBLAS handle.
"""
global _cublas_handles
device = Context.get_device()
if device in _cublas_handles:
return _cublas_handles[device]
handle = cublas.cublasCreate()
_cublas_handles[device] = handle
return handle
示例14: __init__
def __init__(self, nvcc='nvcc', link_options=None, keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[], message_handler=None, log_verbose=False,
cuda_libdir=None):
from pycuda.driver import Context
compute_capability = Context.get_device().compute_capability()
if compute_capability < (3,5):
raise Exception('Minimum compute capability for dynamic parallelism is 3.5 (found: %u.%u)!' %
(compute_capability[0], compute_capability[1]))
else:
from pycuda.driver import Linker
self.linker = Linker(message_handler, link_options, log_verbose)
self._check_arch(arch)
self.nvcc = nvcc
self.keep = keep
self.no_extern_c = no_extern_c
self.arch = arch
self.code = code
self.cache_dir = cache_dir
self.include_dirs = include_dirs
self.cuda_libdir = cuda_libdir
self.libdir, self.libptn = None, None
self.module = None
示例15: compile
def compile(source, nvcc="nvcc", options=None, keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if arch is None:
try:
from pycuda.driver import Context
arch = "sm_%d%d" % Context.get_device().compute_capability()
except RuntimeError:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if cache_dir is None:
from os.path import join
import appdirs
cache_dir = os.path.join(appdirs.user_cache_dir("pycuda", "pycuda"),
"compiler-cache-v1")
from os import makedirs
try:
makedirs(cache_dir)
except OSError, e:
from errno import EEXIST
if e.errno != EEXIST:
raise