本文整理汇总了Python中pycuda.gpuarray.zeros函数的典型用法代码示例。如果您正苦于以下问题:Python zeros函数的具体用法?Python zeros怎么用?Python zeros使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _initialize_gpu_ds
def _initialize_gpu_ds(self):
"""
Setup GPU arrays.
"""
self.synapse_state = garray.zeros(int(self.total_synapses) + \
len(self.input_neuron_list), np.float64)
if self.my_num_gpot_neurons>0:
self.V = garray.zeros(int(self.my_num_gpot_neurons), np.float64)
else:
self.V = None
if self.my_num_spike_neurons>0:
self.spike_state = garray.zeros(int(self.my_num_spike_neurons), np.int32)
if len(self.public_gpot_list)>0:
self.public_gpot_list_g = garray.to_gpu(self.public_gpot_list)
self.projection_gpot = garray.zeros(len(self.public_gpot_list), np.double)
self._extract_gpot = self._extract_projection_gpot_func()
if len(self.public_spike_list)>0:
self.public_spike_list_g = garray.to_gpu( \
(self.public_spike_list-self.spike_shift).astype(np.int32))
self.projection_spike = garray.zeros(len(self.public_spike_list), np.int32)
self._extract_spike = self._extract_projection_spike_func()
示例2: get_next_batch
def get_next_batch(self, batch_size):
if self._reader is None:
self._start_read()
if self._gpu_batch is None:
self._fill_reserved_data()
height, width = self._gpu_batch.data.shape
gpu_data = self._gpu_batch.data
gpu_labels = self._gpu_batch.labels
if self.index + batch_size >= width:
width = width - self.index
labels = gpu_labels[self.index:self.index + batch_size]
#data = gpu_data[:, self.index:self.index + batch_size]
data = gpuarray.zeros((height, width), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + width)
self.index = 0
self._fill_reserved_data()
else:
labels = gpu_labels[self.index:self.index + batch_size]
#data = gpu_data[:, self.index:self.index + batch_size]
data = gpuarray.zeros((height, batch_size), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + batch_size)
#labels = gpu_labels[self.index:self.index + batch_size]
self.index += batch_size
return BatchData(data, labels, self._gpu_batch.epoch)
示例3: _initialize_gpu_ds
def _initialize_gpu_ds(self):
"""
Setup GPU arrays.
"""
self.synapse_state = garray.zeros(max(int(self.total_synapses) + len(self.input_neuron_list), 1), np.float64)
if self.total_num_gpot_neurons > 0:
self.V = garray.zeros(int(self.total_num_gpot_neurons), np.float64)
else:
self.V = None
if self.total_num_spike_neurons > 0:
self.spike_state = garray.zeros(int(self.total_num_spike_neurons), np.int32)
self.block_extract = (256, 1, 1)
if len(self.out_ports_ids_gpot) > 0:
self.out_ports_ids_gpot_g = garray.to_gpu(self.out_ports_ids_gpot)
self.sel_out_gpot_ids_g = garray.to_gpu(self.sel_out_gpot_ids)
self._extract_gpot = self._extract_projection_gpot_func()
if len(self.out_ports_ids_spk) > 0:
self.out_ports_ids_spk_g = garray.to_gpu((self.out_ports_ids_spk - self.spike_shift).astype(np.int32))
self.sel_out_spk_ids_g = garray.to_gpu(self.sel_out_spk_ids)
self._extract_spike = self._extract_projection_spike_func()
if self.ports_in_gpot_mem_ind is not None:
inds = self.sel_in_gpot_ids
self.inds_gpot = garray.to_gpu(inds)
if self.ports_in_spk_mem_ind is not None:
inds = self.sel_in_spk_ids
self.inds_spike = garray.to_gpu(inds)
示例4: riemanntheta_high_dim
def riemanntheta_high_dim(X, Yinv, T, z, g, rad, max_points = 10000000):
parRiemann = RiemannThetaCuda(1,512)
#initialize parRiemann
parRiemann.compile(g)
parRiemann.cache_omega_real(X)
parRiemann.cache_omega_imag(Yinv,T)
#compile the box_points program
point_finder = func1()
R = get_rad(T, rad)
print R
num_int_points = (2*R + 1)**g
num_partitions = num_int_points//max_points
num_final_partition = num_int_points - num_partitions*max_points
osc_part = 0 + 0*1.j
if (num_partitions > 0):
S = gpuarray.zeros(np.int(max_points * g), dtype=np.double)
print "Required number of iterations"
print num_partitions
print
for p in range(num_partitions):
print p
print
S = box_points(point_finder, max_points*p, max_points*(p+1),g,R, S)
parRiemann.cache_intpoints(S, gpu_already=True)
osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
S = gpuarray.zeros(np.int((num_int_points - num_partitions*max_points)*g), dtype = np.double)
print num_partitions*max_points,num_int_points
S = box_points(point_finder, num_partitions*max_points, num_int_points, g, R,S)
parRiemann.cache_intpoints(S,gpu_already = True)
osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
print osc_part
return osc_part
示例5: __init__
def __init__( self, s_dict, synapse_state, dt, debug=False):
self.debug = debug
self.dt = dt
self.num = len( s_dict['id'] )
self.pre = garray.to_gpu( np.asarray( s_dict['pre'], dtype=np.int32 ))
self.ar = garray.to_gpu( np.asarray( s_dict['ar'], dtype=np.float64 ))
self.ad = garray.to_gpu( np.asarray( s_dict['ad'], dtype=np.float64 ))
self.gmax = garray.to_gpu( np.asarray( s_dict['gmax'], dtype=np.float64 ))
self.a0 = garray.zeros( (self.num,), dtype=np.float64 )
self.a1 = garray.zeros( (self.num,), dtype=np.float64 )
self.a2 = garray.zeros( (self.num,), dtype=np.float64 )
self.cond = synapse_state
_num_dendrite_cond = np.asarray(
[s_dict['num_dendrites_cond'][i] for i in s_dict['id']],\
dtype=np.int32).flatten()
_num_dendrite = np.asarray(
[s_dict['num_dendrites_I'][i] for i in s_dict['id']],\
dtype=np.int32).flatten()
self._cum_num_dendrite = garray.to_gpu(_0_cumsum(_num_dendrite))
self._cum_num_dendrite_cond = garray.to_gpu(_0_cumsum(_num_dendrite_cond))
self._num_dendrite = garray.to_gpu(_num_dendrite)
self._num_dendrite_cond = garray.to_gpu(_num_dendrite_cond)
self._pre = garray.to_gpu(np.asarray(s_dict['I_pre'], dtype=np.int32))
self._cond_pre = garray.to_gpu(np.asarray(s_dict['cond_pre'], dtype=np.int32))
self._V_rev = garray.to_gpu(np.asarray(s_dict['reverse'],dtype=np.double))
self.I = garray.zeros(self.num, np.double)
#self._update_I_cond = self._get_update_I_cond_func()
self._update_I_non_cond = self._get_update_I_non_cond_func()
self.update = self._get_gpu_kernel()
示例6: compute_v_without_derivs
def compute_v_without_derivs(self, Xs, Yinvs, Ts):
#Turn the parts of omega into gpuarrays
Xs = np.require(Xs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Yinvs = np.require(Yinvs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Ts = np.require(Ts, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Xs_d = gpuarray.to_gpu(Xs)
Yinvs_d = gpuarray.to_gpu(Yinvs)
Ts_d = gpuarray.to_gpu(Ts)
#Determine N = the number of integer points to sum over
# K = the number of different omegas to compute the function at
N = self.Sd.size/self.g
K = Xs.size/(self.g**2)
#Create room on the gpu for the real and imaginary finite sum calculations
fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
#Turn all scalars into numpy data types
Nd = np.int32(N)
Kd = np.int32(K)
gd = np.int32(self.g)
blocksize = (self.tilewidth, self.tileheight, 1)
gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
self.finite_sum_without_derivs(fsum_reald, fsum_imagd, Xs_d, Yinvs_d, Ts_d,
self.Sd, gd, Nd, Kd,
block = blocksize,
grid = gridsize)
cuda.Context.synchronize()
fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
return fsums_real + 1.0j*fsums_imag
示例7: prepare_for_train
def prepare_for_train(data, label):
assert len(data.shape) == 4
if data.shape[3] != self.batchSize:
self.batchSize = data.shape[3]
for l in self.layers:
l.change_batch_size(self.batchSize)
self.inputShapes = None
self.imgShapes = None
self.outputs = []
self.grads = []
self.local_outputs = []
self.local_grads = []
self.imgShapes = [(self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize)]
self.inputShapes = [(self.numColr * (self.imgSize ** 2) / 4, self.batchSize)]
fc = False
for layer in self.layers:
outputShape = layer.get_output_shape()
row = outputShape[0] * outputShape[1] * outputShape[2]
col = outputShape[3]
if layer.type == 'softmax':
row *= comm.Get_size()
outputShape = (outputShape[0] * comm.Get_size(), 1, 1, outputShape[3])
self.inputShapes.append((row, col))
self.imgShapes.append(outputShape)
area = make_area(outputShape)
self.outputs.append(virtual_array(rank, area = area))
self.local_outputs.append(gpuarray.zeros((row, col), dtype =np.float32))
inputShape = self.inputShapes[-2]
#if layer.type == 'fc':
# inputShape = (inputShape[0] * comm.Get_size(), inputShape[1])
# self.local_grads.append(gpuarray.zeors(inputShape, dtype = np.float32))
# area = make_plain_area(inputShape)
#else:
# self.local_grads.append(gpuarray.zeros(inputShape, dtype= np.float32))
# area = make_area(self.imgShapes[-2])
#self.grads.append(virtual_array(rank, area = area))
area = make_area((self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize))
self.data = virtual_array(rank, local = gpuarray.to_gpu(data.__getitem__(area.to_slice())),
area = area)
if not isinstance(label, GPUArray):
self.label = gpuarray.to_gpu(label).astype(np.float32)
else:
self.label = label
self.label = self.label.reshape((label.size, 1))
self.numCase += data.shape[1]
outputShape = self.inputShapes[-1]
if self.output is None or self.output.shape != outputShape:
self.output = gpuarray.zeros(outputShape, dtype = np.float32)
示例8: logreg_cost
def logreg_cost(self, label, output):
if self.cost.shape[0] != self.batchSize:
self.cost = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
maxid = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
find_col_max_id(maxid, output)
self.batchCorrect = same_reduce(label , maxid)
logreg_cost_col_reduce(output, label, self.cost)
示例9: update_ptrs
def update_ptrs(self):
self.tps_param_ptrs = get_gpu_ptrs(self.tps_params)
self.trans_d_ptrs = get_gpu_ptrs(self.trans_d)
self.lin_dd_ptrs = get_gpu_ptrs(self.lin_dd)
self.w_nd_ptrs = get_gpu_ptrs(self.w_nd)
for b in self.bend_coefs:
self.proj_mat_ptrs[b] = get_gpu_ptrs(self.proj_mats[b])
self.offset_mat_ptrs[b] = get_gpu_ptrs(self.offset_mats[b])
self.pt_ptrs = get_gpu_ptrs(self.pts)
self.kernel_ptrs = get_gpu_ptrs(self.kernels)
self.pt_w_ptrs = get_gpu_ptrs(self.pts_w)
self.pt_t_ptrs = get_gpu_ptrs(self.pts_t)
self.corr_cm_ptrs = get_gpu_ptrs(self.corr_cm)
self.corr_rm_ptrs = get_gpu_ptrs(self.corr_rm)
self.r_coef_ptrs = get_gpu_ptrs(self.r_coefs)
self.c_coef_rn_ptrs = get_gpu_ptrs(self.c_coefs_rn)
self.c_coef_cn_ptrs = get_gpu_ptrs(self.c_coefs_cn)
# temporary space for warping cost computations
self.warp_err = gpuarray.zeros((self.N, MAX_CLD_SIZE), np.float32)
self.bend_res_mat = gpuarray.zeros((DATA_DIM * self.N, DATA_DIM), np.float32)
self.bend_res = [self.bend_res_mat[i * DATA_DIM : (i + 1) * DATA_DIM] for i in range(self.N)]
self.bend_res_ptrs = get_gpu_ptrs(self.bend_res)
self.dims_gpu = gpuarray.to_gpu(np.array(self.dims, dtype=np.int32))
self.ptrs_valid = True
示例10: compute_v_without_derivs
def compute_v_without_derivs(self, Z):
#Turn the numpy set Z into gpuarrays
x = Z.real
y = Z.imag
x = np.require(x, dtype = np.double, requirements=['A','W','O','C'])
y = np.require(y, dtype = np.double, requirements=['A','W','O','C'])
xd = gpuarray.to_gpu(x)
yd = gpuarray.to_gpu(y)
self.yd = yd
#Detemine N = the number of integer points to sum over and
# K = the number of values to compute the function at
N = self.Sd.size/self.g
K = Z.size/self.g
#Create room on the gpu for the real and imaginary finite sum calculations
fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
#Make all scalars into numpy data types
Nd = np.int32(N)
Kd = np.int32(K)
gd = np.int32(self.g)
blocksize = (self.tilewidth, self.tileheight, 1)
gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
self.finite_sum_without_derivs(fsum_reald, fsum_imagd, xd, yd,
self.Sd, gd, Nd, Kd,
block = blocksize,
grid = gridsize)
cuda.Context.synchronize()
fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
return fsums_real + 1.0j*fsums_imag
示例11: setup_pdf_eval
def setup_pdf_eval(self, event_hit, event_time, event_charge, min_twidth,
trange, min_qwidth, qrange, min_bin_content=10,
time_only=True):
"""Setup GPU arrays to compute PDF values for the given event.
The pdf_eval calculation allows the PDF to be evaluated at a
single point for each channel as the Monte Carlo is run. The
effective bin size will be as small as (`min_twidth`,
`min_qwidth`) around the point of interest, but will be large
enough to ensure that `min_bin_content` Monte Carlo events
fall into the bin.
event_hit: ndarray
Hit or not-hit status for each channel in the detector.
event_time: ndarray
Hit time for each channel in the detector. If channel
not hit, the time will be ignored.
event_charge: ndarray
Integrated charge for each channel in the detector.
If channel not hit, the charge will be ignored.
min_twidth: float
Minimum bin size in the time dimension
trange: (float, float)
Range of time dimension in PDF
min_qwidth: float
Minimum bin size in charge dimension
qrange: (float, float)
Range of charge dimension in PDF
min_bin_content: int
The bin will be expanded to include at least this many events
time_only: bool
If True, only the time observable will be used in the PDF.
"""
self.event_nhit = count_nonzero(event_hit)
# Define a mapping from an array of len(event_hit) to an array of length event_nhit
self.map_hit_offset_to_channel_id = np.where(event_hit)[0].astype(np.uint32)
self.map_hit_offset_to_channel_id_gpu = ga.to_gpu(self.map_hit_offset_to_channel_id)
self.map_channel_id_to_hit_offset = np.maximum(0, event_hit.cumsum() - 1).astype(np.uint32)
self.map_channel_id_to_hit_offset_gpu = ga.to_gpu(self.map_channel_id_to_hit_offset)
self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32))
self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32))
self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32))
self.eval_hitcount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
self.eval_bincount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
self.nearest_mc_gpu = ga.empty(shape=self.event_nhit * min_bin_content,
dtype=np.float32)
self.nearest_mc_gpu.fill(1e9)
self.min_twidth = min_twidth
self.trange = trange
self.min_qwidth = min_qwidth
self.qrange = qrange
self.min_bin_content = min_bin_content
assert time_only # Only support time right now
self.time_only = time_only
示例12: fprop
def fprop(self, input, output):
max = gpuarray.zeros((1, self.batchSize), dtype = np.float32)
col_max_reduce(max, input)
add_vec_to_cols(input, max, output, alpha = -1)
gpu_copy_to(cumath.exp(output), output)
sum = gpuarray.zeros(max.shape, dtype = np.float32)
add_col_sum_to_vec(sum, output, alpha = 0)
div_vec_to_cols(output, sum)
示例13: createHashTable
def createHashTable(kd, vd, capacity):
table_capacity_gpu, _ = mod.get_global('table_capacity')
cuda.memcpy_htod(table_capacity_gpu, np.uint([capacity]))
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_capacity,
# &capacity,
# sizeof(unsigned int)));
table_vals_gpu, table_vals_size = mod.get_global('table_values') # pointer-2-pointer
values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
# values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
# cuda.memset_d32(values_gpu.gpudata, 0, values_gpu.size)
cuda.memcpy_dtod(table_vals_gpu, values_gpu.gpudata, table_vals_size)
# float *values;
# allocateCudaMemory((void**)&values, capacity*vd*sizeof(float));
# CUDA_SAFE_CALL(cudaMemset((void *)values, 0, capacity*vd*sizeof(float)));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_values,
# &values,
# sizeof(float *)));
table_entries, table_entries_size = mod.get_global('table_entries')
entries_gpu = gpuarray.empty((capacity*2,1), dtype=np.int)
entries_gpu.fill(-1)
# cuda.memset_d32(entries_gpu.gpudata, 1, entries_gpu.size)
cuda.memcpy_dtod(table_entries, entries_gpu.gpudata, table_entries_size)
# int *entries;
# allocateCudaMemory((void **)&entries, capacity*2*sizeof(int));
# CUDA_SAFE_CALL(cudaMemset((void *)entries, -1, capacity*2*sizeof(int)));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_entries,
# &entries,
# sizeof(unsigned int *)));
########################################
# Assuming LINEAR_D_MEMORY not defined #
########################################
# #ifdef LINEAR_D_MEMORY
# char *ranks;
# allocateCudaMemory((void**)&ranks, capacity*sizeof(char));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_rank,
# &ranks,
# sizeof(char *)));
#
# signed short *zeros;
# allocateCudaMemory((void**)&zeros, capacity*sizeof(signed short));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_zeros,
# &zeros,
# sizeof(char *)));
#
# #else
table_keys_gpu, table_keys_size = mod.get_global('table_keys')
keys_gpu = gpuarray.zeros((capacity*kd,1), dtype=np.short)
# keys_gpu = gpuarray.empty((capacity*kd,1), dtype=np.short)
# cuda.memset_d32(keys_gpu.gpudata, 0, keys_gpu.size)
cuda.memcpy_dtod(table_keys_gpu, keys_gpu.gpudata, table_keys_size)
示例14: logreg_cost_multiview
def logreg_cost_multiview(self, label, output, num_view):
unit = self.batch_size / num_view
if self.cost.shape[0] != unit:
self.cost = gpuarray.zeros((unit, 1), dtype = np.float32)
maxid = gpuarray.zeros((self.batch_size, 1), dtype = np.float32)
find_col_max_id(maxid, output)
self.batchCorrect = same_reduce_multiview(label, maxid, num_view)
tmp = gpuarray.zeros((output.shape[0], unit), dtype = np.float32)
gpu_partial_copy_to(output, tmp, 0, output.shape[0], 0, unit)
logreg_cost_col_reduce(tmp, label, self.cost)
示例15: fprop
def fprop(self, input, output, train=TRAIN):
max = gpuarray.zeros((1, self.batchSize), dtype=np.float32)
col_max_reduce(max, input)
add_vec_to_cols(input, max, output, alpha= -1)
eltwise_exp(output)
sum = gpuarray.zeros(max.shape, dtype=np.float32)
add_col_sum_to_vec(sum, output, alpha=0)
div_vec_to_cols(output, sum)
if PFout:
print_matrix(output, self.name)