本文整理汇总了Python中pycuda.gpuarray.zeros_like函数的典型用法代码示例。如果您正苦于以下问题:Python zeros_like函数的具体用法?Python zeros_like怎么用?Python zeros_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros_like函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, name, input_shape, n_out, epsW=0.001, epsB=0.002, initW = 0.01, initB = 0.0, weight =
None, bias = None):
Layer.__init__(self, name, 'fc')
self.epsW = epsW
self.epsB = epsB
self.initW = initW
self.initB = initB
self.inputShape = input_shape
self.inputSize, self.batchSize = input_shape
self.outputSize = n_out
self.weightShape = (self.outputSize, self.inputSize)
if weight is None:
self.weight = gpuarray.to_gpu(np.random.randn(*self.weightShape) *
self.initW).astype(np.float32)
else:
self.weight = gpuarray.to_gpu(weight).astype(np.float32)
if bias is None:
self.bias = gpuarray.to_gpu(np.random.randn(self.outputSize, 1) *
self.initB).astype(np.float32)
else:
self.bias = gpuarray.to_gpu(bias).astype(np.float32)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
示例2: add_cld
def add_cld(
self,
name,
proj_mats,
offset_mats,
cloud_xyz,
kernel,
scale_params,
r_traj,
r_traj_K,
l_traj,
l_traj_K,
update_ptrs=False,
):
"""
does the normal add, but also adds the trajectories
"""
# don't update ptrs there, do it after this
GPUContext.add_cld(self, name, proj_mats, offset_mats, cloud_xyz, kernel, scale_params, update_ptrs=False)
self.r_traj.append(gpu_pad(r_traj, (MAX_TRAJ_LEN, DATA_DIM)))
self.r_traj_K.append(gpu_pad(r_traj_K, (MAX_TRAJ_LEN, MAX_CLD_SIZE)))
self.l_traj.append(gpu_pad(l_traj, (MAX_TRAJ_LEN, DATA_DIM)))
self.l_traj_K.append(gpu_pad(l_traj_K, (MAX_TRAJ_LEN, MAX_CLD_SIZE)))
self.r_traj_w.append(gpuarray.zeros_like(self.r_traj[-1]))
self.l_traj_w.append(gpuarray.zeros_like(self.l_traj[-1]))
self.l_traj_dims.append(l_traj.shape[0])
self.r_traj_dims.append(r_traj.shape[0])
if update_ptrs:
self.update_ptrs()
示例3: __init__
def __init__(self, bend_coefs, N, QN, NON, NR, x_nd, K_nn, rot_coef,
QN_gpu = None, WQN_gpu = None, NON_gpu = None, NHN_gpu = None):
for b in bend_coefs:
assert b in NON, 'no solver found for bending coefficient {}'.format(b)
self.rot_coef = rot_coef
self.n, self.d = x_nd.shape
self.bend_coefs = bend_coefs
self.N = N
self.QN = QN
self.NON = NON
self.NR = NR
self.x_nd = x_nd
self.K_nn = K_nn
## set up GPU memory
if QN_gpu is None:
self.QN_gpu = gpuarray.to_gpu(self.QN)
else:
self.QN_gpu = QN_gpu
if WQN_gpu is None:
self.WQN_gpu = gpuarray.zeros_like(self.QN_gpu)
else:
self.WQN_gpu = WQN_gpu
if NON_gpu is None:
self.NON_gpu = {}
for b in bend_coefs:
self.NON_gpu[b] = gpuarray.to_gpu(self.NON[b])
else:
self.NON_gpu = NON_gpu
if NHN_gpu is None:
self.NHN_gpu = gpuarray.zeros_like(self.NON_gpu[bend_coefs[0]])
else:
self.NHN_gpu = NHN_gpu
self.valid = True
示例4: _init_weights
def _init_weights(self, weight_shape, bias_shape):
if self.weight is None:
if self.name == 'noise':
assert(weight_shape[0] == weight_shape[1])
self.weight = gpuarray.to_gpu(np.eye(weight_shape[0], dtype = np.float32))
else:
self.weight = gpuarray.to_gpu(randn(weight_shape, np.float32) * self.initW)
if self.bias is None:
if self.initB > 0.0:
self.bias = gpuarray.to_gpu((np.ones(bias_shape, dtype=np.float32) * self.initB))
else:
self.bias = gpuarray.zeros(bias_shape, dtype=np.float32)
Assert.eq(self.weight.shape, weight_shape)
Assert.eq(self.bias.shape, bias_shape)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
if self.momW > 0.0:
if self.weightIncr is None:
self.weightIncr = gpuarray.zeros_like(self.weight)
if self.biasIncr is None:
self.biasIncr = gpuarray.zeros_like(self.bias)
Assert.eq(self.weightIncr.shape, weight_shape)
Assert.eq(self.biasIncr.shape, bias_shape)
示例5: rfftn
def rfftn(self):
# it seems that we can just take half of the original fft
# in both arr, arrC so that we match what was here originally
zeros = gpuarray.zeros_like(self.arr)
arr = gpuarray.zeros_like(self.arr)
arrC = gpuarray.zeros_like(self.arr)
self.plan.execute(self.arr, zeros, data_out_re=arr, data_out_im=arrC)
return CUDAArray(arr, arrC)
示例6: same_reduce_multiview
def same_reduce_multiview(target, vec, num_view):
block = (target.size, 1, 1)
grid = (1, 1)
tmp = gpuarray.zeros_like(target)
ids = gpuarray.zeros_like(target)
_same_reduce_multiview_(target, vec, tmp, ids, I(num_view), block = block , grid = grid)
tmp = tmp.reshape((1, tmp.size))
res = gpuarray.to_gpu(np.zeros((1, 1)).astype(np.float32))
add_row_sum_to_vec(res, tmp)
return res.get()[0, 0]
示例7: __init__
def __init__(self, gpu_detector, ndaq=1):
self.earliest_time_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.float32)
self.earliest_time_int_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.uint32)
self.channel_history_gpu = ga.zeros_like(self.earliest_time_int_gpu)
self.channel_q_int_gpu = ga.zeros_like(self.earliest_time_int_gpu)
self.channel_q_gpu = ga.zeros(len(self.earliest_time_int_gpu), dtype=np.float32)
self.detector_gpu = gpu_detector.detector_gpu
self.solid_id_map_gpu = gpu_detector.solid_id_map
self.solid_id_to_channel_index_gpu = gpu_detector.solid_id_to_channel_index_gpu
self.module = get_cu_module('daq.cu', options=cuda_options,
include_source_directory=True)
self.gpu_funcs = GPUFuncs(self.module)
self.ndaq = ndaq
self.stride = gpu_detector.nchannels
示例8: ewsum
def ewsum(d_a, d_w):
"""
YORI NOTES
This method is faster than CPU if num_w is large, and non_width is small:
When num_w is large, the for loop is small
When non_width is large, there are more threads necessary
"""
width = d_a.shape[0]
total_dim = d_a.size
num_w = d_w.shape[0]
d_tmp_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
ewsum_kernel(d_a, d_w, d_tmp_out,
numpy.int32(num_w), numpy.int32(width), numpy.int32(total_dim),
block=(thread_size,1,1), grid=(block_size,1,1))
# TODO: There HAS to be a better way to do this
x = width / num_w
d_out = gpuarray.zeros((x,) + d_a.shape[1:], numpy.float32)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
ewsum_sum_kernel(d_tmp_out, d_out,
numpy.int32(num_w), numpy.int32(width), numpy.int32(total_dim),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
示例9: test_cublasDcopy
def test_cublasDcopy(self):
x = np.random.rand(5).astype(np.float64)
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.zeros_like(x_gpu)
cublas.cublasDcopy(self.cublas_handle, x_gpu.size, x_gpu.gpudata, 1,
y_gpu.gpudata, 1)
assert np.allclose(y_gpu.get(), x_gpu.get())
示例10: execute
def execute(self):
resulting_image = None
nda = None
f_first = True
img_cnt = 0
for itr_img in self.images_iterator:
img_cnt += 1
if f_first:
nda = np.ndarray(shape=itr_img.image.shape,
dtype=itr_img.image.dtype)
nda[:] = itr_img.image[:]
self.resulting_image = itr_img
resulting_image = gpuarray.to_gpu(nda)
current_image = gpuarray.zeros_like(resulting_image)
f_first = False
shape = itr_img.shape
continue
if shape != itr_img.shape:
img_cnt -= 1
continue
current_image.set(itr_img.image)
resulting_image += current_image
resulting_image /= img_cnt
self.resulting_image.image[:] = resulting_image.get()
示例11: softmax_back
def softmax_back(d_a, d_error, s):
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
softmax_back_kernel(d_a, d_error, d_out, numpy.float32(s), numpy.int32(d_out.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
示例12: map_elementwise_max
def map_elementwise_max(self, op, field_expr):
field = self.rec(field_expr)
field_out = gpuarray.zeros_like(field)
func_rec = self.executor.get_elwise_max_kernel(field.dtype)
func_rec.func.prepared_call((func_rec.grid_dim, 1),
field.gpudata, field_out.gpudata, func_rec.mb_count)
return field_out
示例13: rectify_back
def rectify_back(d_a, d_error, inplace=False):
if inplace:
d_out = d_a
else:
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
rectify_back_kernel(d_a, d_error, d_out, numpy.int32(d_out.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
示例14: exp
def exp(d_a, mode=MathModes.ACC):
if mode == MathModes.ACC:
return cumath.exp(d_a)
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
exp_fast_kernel(d_a, d_out, numpy.int32(d_a.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
示例15: __init__
def __init__(self, name, type, epsW, epsB, initW, initB, momW, momB, wc, weight, bias,
weightIncr , biasIncr, weightShape, biasShape):
Layer.__init__(self, name, type)
self.epsW = F(epsW)
self.epsB = F(epsB)
self.initW = initW
self.initB = initB
self.momW = F(momW)
self.momB = F(momB)
self.wc = F(wc)
if weight is None:
self.weight = gpuarray.to_gpu(randn(weightShape, np.float32) * self.initW)
else:
print >> sys.stderr, 'init weight from disk'
self.weight = gpuarray.to_gpu(weight)#.astype(np.float32)
if bias is None:
if self.initB > 0.0:
self.bias = gpuarray.to_gpu((np.ones(biasShape, dtype=np.float32) * self.initB))
else:
self.bias = gpuarray.zeros(biasShape, dtype=np.float32)
else:
print >> sys.stderr, 'init bias from disk'
self.bias = gpuarray.to_gpu(bias).astype(np.float32)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
if self.momW > 0.0:
if weightIncr is None:
self.weightIncr = gpuarray.zeros_like(self.weight)
else:
print >> sys.stderr, 'init weightIncr from disk'
#weightIncr = np.require(weightIncr, dtype = np.float, requirements = 'C')
self.weightIncr = gpuarray.to_gpu(weightIncr)
if self.momW > 0.0:
if biasIncr is None:
self.biasIncr = gpuarray.zeros_like(self.bias)
else:
print >> sys.stderr, 'init biasIncr from disk'
#biasIncr = np.require(biasIncr, dtype = np.float, requirements = 'C')
self.biasIncr = gpuarray.to_gpu(biasIncr)