本文整理汇总了Python中pycuda.curandom.curand函数的典型用法代码示例。如果您正苦于以下问题:Python curand函数的具体用法?Python curand怎么用?Python curand使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了curand函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_copy
def test_copy(self):
from pycuda.curandom import rand as curand
a_gpu = curand((3,3))
for start, stop, step in [(0,3,1), (1,2,1), (0,3,2), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step].get(), a_gpu.get()[start:stop:step])
a_gpu = curand((3,1))
for start, stop, step in [(0,3,1), (1,2,1), (0,3,2), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step].get(), a_gpu.get()[start:stop:step])
a_gpu = curand((3,3,3))
for start, stop, step in [(0,3,1), (1,2,1), (0,3,2), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step,start:stop:step].get(), a_gpu.get()[start:stop:step,start:stop:step])
a_gpu = curand((3,3,3)).transpose((1,2,0))
a = a_gpu.get()
for start, stop, step in [(0,3,1), (1,2,1), (0,3,2), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step,:,start:stop:step].get(), a_gpu.get()[start:stop:step,:,start:stop:step])
# 4-d should work as long as only 2 axes are discontiguous
a_gpu = curand((3,3,3,3))
a = a_gpu.get()
for start, stop, step in [(0,3,1), (1,2,1), (0,3,3)]:
assert np.allclose(a_gpu[start:stop:step,:,start:stop:step].get(), a_gpu.get()[start:stop:step,:,start:stop:step])
示例2: test_dot
def test_dot(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
b_gpu = curand((200000,))
b = b_gpu.get()
dot_ab = numpy.dot(a, b)
dot_ab_gpu = gpuarray.dot(a_gpu, b_gpu).get()
assert abs(dot_ab_gpu-dot_ab)/abs(dot_ab) < 1e-4
示例3: test_insert_columns
def test_insert_columns(self):
for _ in range(20):
dtype = random.choice((np.float32, np.float64))
N = np.random.randint(100, 1000)
M = np.random.randint(100, 1000)
m = np.random.randint(1, M)
offset = np.random.randint(0, M - m)
X = curand((N, M), dtype)
Y = curand((N, m), dtype)
insert_columns(Y, X, offset)
self.assertTrue(np.all(X.get()[:, offset:offset+m] == Y.get()))
示例4: main
def main():
from pytools import Table
tbl = Table()
tbl.add_row(("type", "size [MiB]", "time [ms]", "mem.bw [GB/s]"))
from random import shuffle
for dtype_out in [numpy.float32, numpy.float64]:
for ex in range(15,27):
sz = 1 << ex
print sz
from pycuda.curandom import rand as curand
a_gpu = curand((sz,))
b_gpu = curand((sz,))
assert sz == a_gpu.shape[0]
assert len(a_gpu.shape) == 1
from pycuda.reduction import get_sum_kernel, get_dot_kernel
krnl = get_dot_kernel(dtype_out, a_gpu.dtype)
elapsed = [0]
def wrap_with_timer(f):
def result(*args, **kwargs):
start = cuda.Event()
stop = cuda.Event()
start.record()
f(*args, **kwargs)
stop.record()
stop.synchronize()
elapsed[0] += stop.time_since(start)
return result
# warm-up
for i in range(3):
krnl(a_gpu, b_gpu)
cnt = 10
for i in range(cnt):
krnl(a_gpu, b_gpu,
#krnl(a_gpu,
kernel_wrapper=wrap_with_timer)
bytes = a_gpu.nbytes*2*cnt
secs = elapsed[0]*1e-3
tbl.add_row((str(dtype_out), a_gpu.nbytes/(1<<20), elapsed[0]/cnt, bytes/secs/1e9))
print tbl
示例5: test_dot
def test_dot(self):
from pycuda.curandom import rand as curand
for l in [2, 3, 4, 5, 6, 7, 31, 32, 33, 127, 128, 129, 255, 256, 257, 16384 - 993, 20000]:
a_gpu = curand((l,))
a = a_gpu.get()
b_gpu = curand((l,))
b = b_gpu.get()
dot_ab = np.dot(a, b)
dot_ab_gpu = gpuarray.dot(a_gpu, b_gpu).get()
assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4
示例6: test_subset_minmax
def test_subset_minmax(self):
from pycuda.curandom import rand as curand
l_a = 200000
gran = 5
l_m = l_a - l_a // gran + 1
if has_double_support():
dtypes = [np.float64, np.float32, np.int32]
else:
dtypes = [np.float32, np.int32]
for dtype in dtypes:
a_gpu = curand((l_a,), dtype)
a = a_gpu.get()
meaningful_indices_gpu = gpuarray.zeros(l_m, dtype=np.int32)
meaningful_indices = meaningful_indices_gpu.get()
j = 0
for i in range(len(meaningful_indices)):
meaningful_indices[i] = j
j = j + 1
if j % gran == 0:
j = j + 1
meaningful_indices_gpu = gpuarray.to_gpu(meaningful_indices)
b = a[meaningful_indices]
min_a = np.min(b)
min_a_gpu = gpuarray.subset_min(meaningful_indices_gpu, a_gpu).get()
assert min_a_gpu == min_a
示例7: test_elwise_kernel
def test_elwise_kernel(self):
from pycuda.curandom import rand as curand
a_gpu = curand((50,))
b_gpu = curand((50,))
from pycuda.elementwise import ElementwiseKernel
lin_comb = ElementwiseKernel(
"float a, float *x, float b, float *y, float *z",
"z[i] = a*x[i] + b*y[i]",
"linear_combination")
c_gpu = gpuarray.empty_like(a_gpu)
lin_comb(5, a_gpu, 6, b_gpu, c_gpu)
assert la.norm((c_gpu - (5*a_gpu+6*b_gpu)).get()) < 1e-5
示例8: test_transpose
def test_transpose(self):
import pycuda.gpuarray as gpuarray
from pycuda.curandom import rand as curand
a_gpu = curand((10,20,30))
a = a_gpu.get()
#assert np.allclose(a_gpu.transpose((1,2,0)).get(), a.transpose((1,2,0))) # not contiguous
assert np.allclose(a_gpu.T.get(), a.T)
示例9: test_sum
def test_sum(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
sum_a = np.sum(a)
sum_a_gpu = gpuarray.sum(a_gpu).get()
assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
示例10: test_if_positive
def test_if_positive(self):
from pycuda.curandom import rand as curand
l = 20
a_gpu = curand((l,))
b_gpu = curand((l,))
a = a_gpu.get()
b = b_gpu.get()
import pycuda.gpuarray as gpuarray
max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu)
min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu)
print (max_a_b_gpu)
print((np.maximum(a, b)))
assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0
assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
示例11: test_view_and_strides
def test_view_and_strides(self):
from pycuda.curandom import rand as curand
X = curand((5, 10), dtype=np.float32)
Y = X[:3, :5]
y = Y.view()
assert y.shape == Y.shape
assert y.strides == Y.strides
assert np.array_equal(y.get(), X.get()[:3, :5])
示例12: test_complex_bits
def test_complex_bits(self):
from pycuda.curandom import rand as curand
if has_double_support():
dtypes = [np.complex64, np.complex128]
else:
dtypes = [np.complex64]
n = 20
for tp in dtypes:
dtype = np.dtype(tp)
from pytools import match_precision
real_dtype = match_precision(np.dtype(np.float64), dtype)
z = (curand((n,), real_dtype).astype(dtype)
+ 1j*curand((n,), real_dtype).astype(dtype))
assert la.norm(z.get().real - z.real.get()) == 0
assert la.norm(z.get().imag - z.imag.get()) == 0
assert la.norm(z.get().conj() - z.conj().get()) == 0
示例13: test_sum
def test_sum(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
sum_a = numpy.sum(a)
from pycuda.reduction import get_sum_kernel
sum_a_gpu = gpuarray.sum(a_gpu).get()
assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
示例14: test_newaxis
def test_newaxis(self):
import pycuda.gpuarray as gpuarray
from pycuda.curandom import rand as curand
a_gpu = curand((10,20,30))
a = a_gpu.get()
b_gpu = a_gpu[:,np.newaxis]
b = a[:,np.newaxis]
assert b_gpu.shape == b.shape
assert b_gpu.strides == b.strides
示例15: test_astype
def test_astype(self):
from pycuda.curandom import rand as curand
if not has_double_support():
return
a_gpu = curand((2000,), dtype=np.float32)
a = a_gpu.get().astype(np.float64)
a2 = a_gpu.astype(np.float64).get()
assert a2.dtype == np.float64
assert la.norm(a - a2) == 0, (a, a2)
a_gpu = curand((2000,), dtype=np.float64)
a = a_gpu.get().astype(np.float32)
a2 = a_gpu.astype(np.float32).get()
assert a2.dtype == np.float32
assert la.norm(a - a2)/la.norm(a) < 1e-7