本文整理汇总了Python中pycuda.driver.mem_get_info函数的典型用法代码示例。如果您正苦于以下问题:Python mem_get_info函数的具体用法?Python mem_get_info怎么用?Python mem_get_info使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mem_get_info函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: allocate
def allocate(self, size):
from traceback import extract_stack
stack = tuple(frm[2] for frm in extract_stack())
description = self.describe(stack, size)
histogram = {}
for bsize, descr in self.blocks.itervalues():
histogram[bsize, descr] = histogram.get((bsize, descr), 0) + 1
from pytools import common_prefix
cpfx = common_prefix(descr for bsize, descr in histogram)
print >> self.logfile, \
"\n Allocation of size %d occurring " \
"(mem: last_free:%d, free: %d, total:%d) (pool: held:%d, active:%d):" \
"\n at: %s" % (
(size, self.last_free)
+ cuda.mem_get_info()
+ (self.held_blocks, self.active_blocks,
description))
hist_items = sorted(list(histogram.iteritems()))
for (bsize, descr), count in hist_items:
print >> self.logfile, \
" %s (%d bytes): %dx" % (descr[len(cpfx):], bsize, count)
if self.interactive:
raw_input(" [Enter]")
result = DeviceMemoryPool.allocate(self, size)
self.blocks[result] = size, description
self.last_free, _ = cuda.mem_get_info()
return result
示例2: test_memleak
def test_memleak():
log.info("test_memleak()")
from pycuda import driver
#use the first device for this test
start_free_memory = None
for i in range(100):
d = driver.Device(0)
context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
if start_free_memory is None:
start_free_memory, _ = driver.mem_get_info()
free_memory, total_memory = driver.mem_get_info()
log.info("%s%% free_memory: %s MB, total_memory: %s MB", str(i).rjust(3), free_memory/1024/1024, total_memory/1024/1024)
context.pop()
context.detach()
w = random.randint(16, 128)*8
h = random.randint(16, 128)*8
n = random.randint(2, 10)
test_encoder(encoder_module, options={}, dimensions=[(w, h)], n_images=n)
d = driver.Device(0)
context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
end_free_memory, _ = driver.mem_get_info()
context.pop()
context.detach()
log.info("memory lost: %s MB", (start_free_memory-end_free_memory)/1024/1024)
示例3: __init__
def __init__(self, init_data, n_generators):
self.ctx = curr_gpu.make_context()
self.module = pycuda.compiler.SourceModule(kernels_cuda_src, no_extern_c=True)
(free, total) = cuda.mem_get_info()
print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
print(("Global free memory :%i Mo free" % (free / 10 ** 6)))
################################################################################################################
self.width_mat = np.int32(init_data.shape[0])
# self.gpu_init_data = ga.to_gpu(init_data)
self.gpu_init_data = cuda.mem_alloc(init_data.nbytes)
cuda.memcpy_htod(self.gpu_init_data, init_data)
self.cpu_new_data = np.zeros_like(init_data, dtype=np.float32)
print("size new data = ", self.cpu_new_data.nbytes / 10 ** 6)
(free, total) = cuda.mem_get_info()
print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
print(("Global free memory :%i Mo free" % (free / 10 ** 6)))
self.gpu_new_data = cuda.mem_alloc(self.cpu_new_data.nbytes)
cuda.memcpy_htod(self.gpu_new_data, self.cpu_new_data)
# self.gpu_new_data = ga.to_gpu(self.cpu_new_data)
self.cpu_vect_sum = np.zeros((self.width_mat,), dtype=np.float32)
self.gpu_vect_sum = cuda.mem_alloc(self.cpu_vect_sum.nbytes)
cuda.memcpy_htod(self.gpu_vect_sum, self.cpu_vect_sum)
# self.gpu_vect_sum = ga.to_gpu(self.cpu_vect_sum)
################################################################################################################
self.init_rng = self.module.get_function("init_rng")
self.gen_rand_mat = self.module.get_function("gen_rand_mat")
self.sum_along_axis = self.module.get_function("sum_along_axis")
self.norm_along_axis = self.module.get_function("norm_along_axis")
self.init_vect_sum = self.module.get_function("init_vect_sum")
self.copy_mat = self.module.get_function("copy_mat")
################################################################################################################
self.n_generators = n_generators
seed = 1
self.rng_states = cuda.mem_alloc(
n_generators
* characterize.sizeof("curandStateXORWOW", "#include <curand_kernel.h>")
)
self.init_rng(
np.int32(n_generators),
self.rng_states,
np.uint64(seed),
np.uint64(0),
block=(64, 1, 1),
grid=(n_generators // 64 + 1, 1),
)
(free, total) = cuda.mem_get_info()
size_block_x = 32
size_block_y = 32
n_blocks_x = int(self.width_mat) // (size_block_x) + 1
n_blocks_y = int(self.width_mat) // (size_block_y) + 1
self.grid = (n_blocks_x, n_blocks_y, 1)
self.block = (size_block_x, size_block_y, 1)
示例4: swap_out_to_CPU
def swap_out_to_CPU(elem):
# prepare variables
return_falg = True
u, ss, sp = elem
dp = data_list[u][ss][sp]
bytes = dp.data_bytes
# now we will swap out, this data to CPU
# so first we should check CPU has enough free memory
MemFree = cpu_mem_check()
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "CPU MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(MemFree),'-',print_bytes(bytes))
log(log_str,'memory',log_type)
if bytes > MemFree:
# not enough memory for swap out to CPU
return False
# we have enough memory so we can swap out
# if other process not malloc during this swap out oeprataion
try:
buf = numpy.empty((dp.data_memory_shape), dtype= dp.data_contents_memory_dtype)
except:
# we failed memory allocation in the CPU
return False
# do the swap out
#cuda.memcpy_dtoh_async(buf, dp.devptr, stream=stream[1])
cuda.memcpy_dtoh(buf, dp.devptr)
ctx.synchronize()
dp.devptr.free()
dp.devptr = None
dp.data = buf
dp.data_dtype = numpy.ndarray
dp.memory_type = 'memory'
gpu_list.remove(elem)
cpu_list.append(elem)
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "GPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str,'memory',log_type)
return True
示例5: show_GPU_mem
def show_GPU_mem():
import pycuda.driver as cuda
mem_free = float(cuda.mem_get_info()[0])
mem_free_per = mem_free/float(cuda.mem_get_info()[1])
mem_used = float(cuda.mem_get_info()[1] - cuda.mem_get_info()[0])
mem_used_per = mem_used/float(cuda.mem_get_info()[1])
print '\nGPU memory available {0} Mbytes, {1} % of total \n'.format(
mem_free/1024**2, 100*mem_free_per)
print 'GPU memory used {0} Mbytes, {1} % of total \n'.format(
mem_used/1024**2, 100*mem_used_per)
示例6: swap_out_to_hard_disk
def swap_out_to_hard_disk(elem):
# prepare variables
return_falg = True
u, ss, sp = elem
dp = data_list[u][ss][sp]
bytes = dp.data_bytes
# now we will swap out, this CPU to hard disk
# so first we should check hard disk has enough free memory
file_name = '%d_temp'%(rank)
os.system('df . > %s'%(file_name))
f = open(file_name)
s = f.read()
f.close()
ss = s.split()
# get available byte
avail = int(ss[10])
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "HARD disk MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(avail),'-',print_bytes(bytes))
log(log_str,'memory',log_type)
if bytes > avail:
# we failed make swap file in hard disk
return False
# now we have enough hard disk to make swap file
# temp file name, "temp_data, rank, u, ss, sp"
file_name = 'temp_data, %s, %s, %s, %s'%(rank, u, ss, sp)
f = open(file_name,'wb')
f.write(dp.data)
f.close()
dp.data = None
dp.hard_disk = file_name
dp.memory_type = 'hard_disk'
cpu_list.remove(elem)
hard_list.append(elem)
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "CPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str,'memory',log_type)
return True
示例7: run
def run(self):
drv.init()
a0=numpy.zeros((p,),dtype=numpy.complex64)
self.dev = drv.Device(self.number)
self.ctx = self.dev.make_context()
#TO VERIFY WHETHER ALL THE MEMORY IS FREED BEFORE NEXT ALLOCATION (THIS DOES NOT HAPPEN IN MULTITHREADING)
print drv.mem_get_info()
self.gpu_a = garray.empty((self.input_cpu.size,), dtype=numpy.complex64)
self.gpu_b = garray.zeros_like(self.gpu_a)
self.gpu_a = garray.to_gpu(self.input_cpu)
plan = Plan(a0.shape,context=self.ctx)
plan.execute(self.gpu_a, self.gpu_b, batch=p/m)
self.temp = self.gpu_b.get()
print output_cpu._closed
self.output_cpu.put(self.temp)
示例8: init_module
def init_module():
global context, context_wrapper
if context_wrapper is not None:
return
log_sys_info()
device_id, device = select_device()
context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
debug("testing with context=%s", context)
debug("api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
debug("using device %s", device_info(device))
debug("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
context_wrapper = CudaContextWrapper(context)
#generate kernel sources:
for rgb_format, yuv_formats in COLORSPACES_MAP.items():
m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
KERNELS_MAP.update(m)
_kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))
#now, pre-compile the kernels:
for src_format, dst_format in KERNELS_MAP.keys():
get_CUDA_kernel(device_id, src_format, dst_format)
context.pop()
示例9: filter
def filter(self, video_input):
"""
Performs RF filtering on input video
for all the rfs
"""
if len(video_input.shape) == 2:
# if input has 2 dimensions
assert video_input.shape[1] == self.size
else:
# if input has 3 dimensions
assert (video_input.shape[1]*video_input.shape[2] ==
self.size)
# rasterizing inputs
video_input.resize((video_input.shape[0], self.size))
d_video = parray.to_gpu(video_input)
d_output = parray.empty((self.num_neurons, video_input.shape[0]),
self.dtype)
free, total = cuda.mem_get_info()
self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize)
* 3 // 4 // self.size)
self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2
self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons)
handle = la.cublashandle()
for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS):
Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
self.generate_filters(startbias=i, N_filters=Nfilters)
la.dot(self.filters, d_video, opb='t',
C=d_output[i: i+Nfilters],
handle=handle)
del self.filters
return d_output.T()
示例10: filter
def filter(self, V):
"""
Filter a video V
Must set up parameters of CS RF first
Parameters
----------
V : 3D ndarray, with shape (num_frames, Px, Py)
Returns
-------
the filtered output by the gabor filters specified in self
output is a PitchArray with shape (num_neurons, num_frames),
jth row of which is the output of jth gabor filter
"""
d_output = parray.empty((self.num_neurons, V.shape[0]), self.dtype)
d_video = parray.to_gpu(V.reshape(V.shape[0], V.shape[1]*V.shape[2]))
free,total = cuda.mem_get_info()
self.ONE_TIME_FILTERS = (free / self.dtype.itemsize) * 3/4 / self.Pxall / self.Pyall
handle = la.cublashandle()
for i in np.arange(0,self.num_neurons,self.ONE_TIME_FILTERS):
Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
self.generate_visual_receptive_fields(startbias = i, N_filters = Nfilters)
cublasDgemm(handle.handle, 't','n', V.shape[0], int(Nfilters), self.Pxall*self.Pyall, self.dx*self.dy, d_video.gpudata, d_video.ld, self.filters.gpudata, self.filters.ld, 0, int(int(d_output.gpudata)+int(d_output.ld*i*d_output.dtype.itemsize)) , d_output.ld)
return d_output.T()
示例11: init_cuda
def init_cuda():
"""Initialize CUDA functionality
This function attempts to load the necessary interfaces
(hardware connectivity) to run CUDA-based filtering. This
function should only need to be run once per session.
If the config var (set via mne.set_config or in ENV)
MNE_USE_CUDA == 'true', this function will be executed when
importing mne. If this variable is not set, this function can
be manually executed.
"""
global cuda_capable
global cuda_multiply_inplace_c128
global cuda_halve_c128
global cuda_real_c128
if cuda_capable is True:
logger.info("CUDA previously enabled, currently %s available memory" % sizeof_fmt(mem_get_info()[0]))
return
# Triage possible errors for informative messaging
cuda_capable = False
try:
import pycuda.gpuarray
import pycuda.driver
except ImportError:
logger.warning("module pycuda not found, CUDA not enabled")
return
try:
# Initialize CUDA; happens with importing autoinit
import pycuda.autoinit # noqa, analysis:ignore
except ImportError:
logger.warning("pycuda.autoinit could not be imported, likely " "a hardware error, CUDA not enabled")
return
# Make sure scikits.cuda is installed
try:
from scikits.cuda import fft as cudafft
except ImportError:
logger.warning("module scikits.cuda not found, CUDA not " "enabled")
return
# Make our multiply inplace kernel
from pycuda.elementwise import ElementwiseKernel
# let's construct our own CUDA multiply in-place function
cuda_multiply_inplace_c128 = ElementwiseKernel(
"pycuda::complex<double> *a, pycuda::complex<double> *b", "b[i] *= a[i]", "multiply_inplace"
)
cuda_halve_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] /= 2.0", "halve_value")
cuda_real_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] = real(a[i])", "real_value")
# Make sure we can use 64-bit FFTs
try:
cudafft.Plan(16, np.float64, np.complex128) # will get auto-GC'ed
except:
logger.warning("Device does not support 64-bit FFTs, " "CUDA not enabled")
return
cuda_capable = True
# Figure out limit for CUDA FFT calculations
logger.info("Enabling CUDA with %s available memory" % sizeof_fmt(mem_get_info()[0]))
示例12: is_memory_enough
def is_memory_enough(a):
try:
rest, total = driver.mem_get_info()
except driver.LogicError: # child thread cannot use context from the main thread...
# the following does not work yet
from pycuda import tools
import skcuda
driver.init()
context = tools.make_default_context() # try to make as new context, but cannot deactivate the old context stack
device = context.get_device()
skcuda.misc.init_context(device)
rest, total = driver.mem_get_info()
if (sys.getsizeof(a) * 2) < rest:
return True
示例13: is_gpu_memory_enough
def is_gpu_memory_enough(self, a):
if CUDA:
rest, total = driver.mem_get_info()
if (sys.getsizeof(a) * 2) < rest:
return True
else:
return True
示例14: init_all_devices
def init_all_devices():
global DEVICES, DEVICE_INFO
if DEVICES is not None:
return DEVICES
log.info("CUDA initialization (this may take a few seconds)")
driver.init()
DEVICES = []
DEVICE_INFO = {}
log("CUDA driver version=%s", driver.get_driver_version())
ngpus = driver.Device.count()
if ngpus==0:
log.info("CUDA %s / PyCUDA %s, no devices found", ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT)
return DEVICES
da = driver.device_attribute
cf = driver.ctx_flags
for i in range(ngpus):
device = None
context = None
devinfo = "gpu %i" % i
try:
device = driver.Device(i)
devinfo = device_info(device)
log(" + testing device %s: %s", i, devinfo)
DEVICE_INFO[i] = devinfo
host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
if not host_mem:
log.warn("skipping device %s (cannot map host memory)", devinfo)
continue
context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
try:
log(" created context=%s", context)
log(" api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
log(" memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
log(" multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
log(" max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
log(" max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
log(" maximum texture size: %sx%s", max_width, max_height)
log(" max pitch: %s", device.get_attribute(da.MAX_PITCH))
SMmajor, SMminor = device.compute_capability()
compute = (SMmajor<<4) + SMminor
log(" compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
if i==0:
#we print the list info "header" from inside the loop
#so that the log output is bunched up together
log.info("CUDA %s / PyCUDA %s, found %s device%s:",
".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
DEVICES.append(i)
log.info(" + %s (memory: %s%% free, compute: %s.%s)", device_info(device), 100*free/total, SMmajor, SMminor)
finally:
context.pop()
except Exception as e:
log.error("error on device %s: %s", devinfo, e)
return DEVICES
示例15: select_device
def select_device(preferred_device_id=-1, preferred_device_name=None, min_compute=0):
if preferred_device_name is None:
preferred_device_name = get_pref("device-name")
if preferred_device_id<0:
device_id = get_pref("device-id")
if device_id>=0:
preferred_device_id = device_id
devices = init_all_devices()
global DEVICE_STATE
free_pct = 0
cf = driver.ctx_flags
#split device list according to device state:
ok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is True]
nok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is not True]
for list_name, device_list in {"OK" : ok_devices, "failing" : nok_devices}.items():
selected_device_id = None
selected_device = None
log("will test %s device%s from %s list: %s", len(device_list), engs(device_list), list_name, device_list)
for device_id in device_list:
context = None
try:
device = driver.Device(device_id)
log("select_device: testing device %s: %s", device_id, device_info(device))
context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
log("created context=%s", context)
free, total = driver.mem_get_info()
log("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
tpct = 100*free/total
SMmajor, SMminor = device.compute_capability()
compute = (SMmajor<<4) + SMminor
if compute<min_compute:
log("ignoring device %s: compute capability %#x (minimum %#x required)", device_info(device), compute, min_compute)
elif device_id==preferred_device_id:
l = log
if len(device_list)>1:
l = log.info
l("device matches preferred device id %s: %s", preferred_device_id, device_info(device))
return device_id, device
elif preferred_device_name and device_info(device).find(preferred_device_name)>=0:
log("device matches preferred device name: %s", preferred_device_name)
return device_id, device
elif tpct>free_pct:
selected_device = device
selected_device_id = device_id
free_pct = tpct
finally:
if context:
context.pop()
context.detach()
if selected_device_id>=0 and selected_device:
l = log
if len(devices)>1:
l = log.info
l("selected device %s: %s", device_id, device_info(device))
return selected_device_id, selected_device
return -1, None