本文整理汇总了Python中pycuda.driver.memcpy_dtod函数的典型用法代码示例。如果您正苦于以下问题:Python memcpy_dtod函数的具体用法?Python memcpy_dtod怎么用?Python memcpy_dtod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了memcpy_dtod函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _read_external_input
def _read_external_input(self):
if not self.input_eof or self.frame_count<self.frames_in_buffer:
cuda.memcpy_dtod(int(int(self.synapse_state.gpudata) + \
self.total_synapses*self.synapse_state.dtype.itemsize), \
int(int(self.I_ext.gpudata) + self.frame_count*self.I_ext.ld*self.I_ext.dtype.itemsize), \
self.num_input * self.synapse_state.dtype.itemsize)
self.frame_count += 1
else:
self.logger.info('Input end of file reached. Subsequent behaviour is undefined.')
if self.frame_count >= self._one_time_import and not self.input_eof:
input_ld = self.input_h5file.root.array.shape[0]
if input_ld - self.file_pointer < self._one_time_import:
h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
else:
h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
if h_ext.shape[0] == self.I_ext.shape[0]:
self.I_ext.set(h_ext)
self.file_pointer += self._one_time_import
self.frame_count = 0
else:
pad_shape = list(h_ext.shape)
self.frames_in_buffer = h_ext.shape[0]
pad_shape[0] = self._one_time_import - h_ext.shape[0]
h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
self.I_ext.set(h_ext)
self.file_pointer = input_ld
if self.file_pointer == self.input_h5file.root.array.shape[0]:
self.input_eof = True
示例2: _gpuarray_copy
def _gpuarray_copy(array):
if not array.flags.forc:
raise RuntimeError('only contiguous arrays may copied.')
new = GPUArray(array.shape, array.dtype, allocator=array.allocator)
drv.memcpy_dtod(new.gpudata, array.gpudata, array.nbytes)
return new
示例3: copy
def copy(self):
if not self.flags.forc:
raise RuntimeError("only contiguous arrays may copied.")
new = GPUArray(self.shape, self.dtype)
drv.memcpy_dtod(new.gpudata,self.gpudata,self.nbytes)
return new
示例4: _read_external_input
def _read_external_input(self):
# if eof not reached or there are frames in buffer not read
# copy the input from buffer to synapse state array
if not self.input_eof or self.frame_count < self.frames_in_buffer:
cuda.memcpy_dtod(
int(int(self.synapse_state.gpudata) + self.total_synapses * self.synapse_state.dtype.itemsize),
int(int(self.I_ext.gpudata) + self.frame_count * self.I_ext.ld * self.I_ext.dtype.itemsize),
self.num_input * self.synapse_state.dtype.itemsize,
)
self.frame_count += 1
else:
self.log_info("Input end of file reached. " "Subsequent behaviour is undefined.")
# if all buffer frames were read, read from file
if self.frame_count >= self._one_time_import and not self.input_eof:
input_ld = self.input_h5file.root.array.shape[0]
if input_ld - self.file_pointer < self._one_time_import:
h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
else:
h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
if h_ext.shape[0] == self.I_ext.shape[0]:
self.I_ext.set(h_ext)
self.file_pointer += self._one_time_import
self.frame_count = 0
else:
pad_shape = list(h_ext.shape)
self.frames_in_buffer = h_ext.shape[0]
pad_shape[0] = self._one_time_import - h_ext.shape[0]
h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
self.I_ext.set(h_ext)
self.file_pointer = input_ld
if self.file_pointer == self.input_h5file.root.array.shape[0]:
self.input_eof = True
示例5: swapHashTableValues
def swapHashTableValues(new_vals):
table_vals, table_vals_size = mod.get_global('table_values') # (device_ptr, size_in_bytes)
old_vals_gpu = cuda.mem_alloc(table_vals_size)
# old_vals_gpu = gpuarray.empty((table_vals_size,1), )
cuda.memcpy_dtod(old_vals_gpu, table_vals, table_vals_size)
cuda.memcpy_dtod(table_vals, new_vals.gpudata, table_vals_size)
return old_vals_gpu
示例6: cache_z
def cache_z(self, z):
x = np.require(z.real, dtype = np.double, requirements = ['A','W','O','C'])
y = np.require(z.imag, dtype = np.double, requirements = ['A','W','O','C'])
xd = gpuarray.to_gpu(x)
yd = gpuarray.to_gpu(y)
cuda.memcpy_dtod(self.xd, xd.ptr, xd.nbytes)
cuda.memcpy_dtod(self.yd, yd.ptr, yd.nbytes)
示例7: matvec
def matvec(self, v):
x = v.reshape((self.D, self.D))
self.xG.set(x)
#self.out2.set(self.xG)
#self.out2[:] = self.xG
cd.memcpy_dtod(self.out2.gpudata, self.xG.gpudata, self.xG.nbytes)
out = [self.out, self.out_p]
out2 = [self.out2, self.out2_p]
if self.left: #Multiplying from the left, but x is a col. vector, so use mat_dagger
for k in range(len(self.A1G)):
if self.use_batch:
eps_l_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0],
self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
else:
eps_l_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
self.tmp, self.tmp2, self.ones, self.zeros,
self.streams, self.hdl)
out, out2 = out2, out
Ehx = out2[0]
if self.pseudo:
QEQhx = Ehx - self.lG * m.adot(self.r, x)
#res = QEQhx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p),
QEQhx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
#res = Ehx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p),
Ehx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
for k in range(len(self.A2G) - 1, -1, -1):
if self.use_batch:
eps_r_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0],
self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
else:
eps_r_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
self.tmp, self.tmp2, self.ones, self.zeros,
self.streams, self.hdl)
out, out2 = out2, out
Ex = out2[0]
if self.pseudo:
QEQx = Ex - self.rG * m.adot(self.l, x)
#res = QEQx.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p),
QEQx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
#res = Ex.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p),
Ex.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
return res.get().ravel()
示例8: set_data
def set_data(filenames, file_count,subb, config, count, cur, img_mean, gpu_data, gpu_data_remote, ctx, icomm,img_batch_empty):
load_time = time.time()
data=None
# aa = config['rank']+count/subb*size
# img_list = range(aa*config['file_batch_size'],(aa+1)*config['file_batch_size'],1)
#print rank, img_list
if config['data_source'] in ['hkl','both']:
data_hkl = hkl.load(str(filenames[file_count]))# c01b
data = data_hkl
if config['data_source'] in ['lmdb', 'both']:
data_lmdb = lmdb_load_cur(cur,config,img_batch_empty)
data = data_lmdb
if config['data_source']=='both':
if config['rank']==0: print (rank,(data_hkl-data_lmdb)[1,0:3,1,1].tolist())
load_time = time.time()-load_time #)*
sub_time = time.time() #(
data = data -img_mean
sub_time = time.time()-sub_time
crop_time = time.time() #(
for minibatch_index in range(subb):
count+=1
batch_data = data[:,:,:,minibatch_index*config['batch_size']:(minibatch_index+1)*batch_size]
if mode == 'train':
rand_arr = get_rand3d(config['random'], count+(rank+1)*n_files*(subb))
else:
rand_arr = np.float32([0.5, 0.5, 0])
batch_data = crop_and_mirror(batch_data, rand_arr, flag_batch=config['batch_crop_mirror'],cropsize=config['input_width'])
gpu_data[minibatch_index].set(batch_data)
crop_time = time.time() - crop_time #)
#print 'load_time: %f (load %f, sub %f, crop %f)' % (load_time+crop_time+sub_time, load_time,sub_time, crop_time)
# wait for computation on last file to finish
msg = icomm.recv(source=MPI.ANY_SOURCE,tag=35)
assert msg == "calc_finished"
for minibatch_index in range(subb):
# copy from preload area
drv.memcpy_dtod(gpu_data_remote[minibatch_index].ptr,
gpu_data[minibatch_index].ptr,
gpu_data[minibatch_index].dtype.itemsize *
gpu_data[minibatch_index].size
)
ctx.synchronize()
icomm.isend("copy_finished",dest=0,tag=55)
return count
示例9: copy
def copy(self):
"""
returns a duplicated copy of self
"""
result = self._new_like_me()
if self.size:
cuda.memcpy_dtod(result.gpudata, self.gpudata, self.mem_size * self.dtype.itemsize)
return result
示例10: _loadInput
def _loadInput(self, stim):
logging.debug('loadInput')
# shortcuts
nrXY = self.nrX * self.nrY
nrXYD = self.nrX * self.nrY * self.nrDirs
# parse input
assert type(stim).__module__ == "numpy", "stim must be numpy array"
assert type(stim).__name__ == "ndarray", "stim must be numpy.ndarray"
assert stim.size > 0, "stim cannot be []"
stim = stim.astype(np.ubyte)
rows, cols = stim.shape
logging.debug("- stim shape={0}x{1}".format(rows, cols))
# shift d_stimBuf in time by 1 frame, from frame i to frame i-1
# write our own memcpy kernel... :-(
gdim = (int(iDivUp(nrXY, 128)), 1)
bdim = (128, 1, 1)
for i in xrange(1, self.nrT):
stimBufPt_dst = np.intp(self.d_stimBuf) + self.szXY * (i - 1)
stimBufPt_src = np.intp(self.d_stimBuf) + self.szXY * i
self.dev_memcpy_dtod(
stimBufPt_dst,
stimBufPt_src,
np.int32(nrXY),
block=bdim, grid=gdim)
# index into d_stimBuf array to place the new stim at the end
# (newest frame at pos: nrT-1)
d_stimBufPt = np.intp(self.d_stimBuf) + self.szXY * (self.nrT-1)
# \TODO implement RGB support
self.dev_split_gray(
d_stimBufPt,
cuda.In(stim),
np.int32(stim.size),
block=bdim, grid=gdim)
# create working copy of d_stimBuf
cuda.memcpy_dtod(self.d_scalingStimBuf, self.d_stimBuf,
self.szXY*self.nrT)
# reset V1complex responses to 0
# \FIXME not sure how to use memset...doesn't seem to give expected
# result
tmp = np.zeros(nrXYD).astype(np.float32)
cuda.memcpy_htod(self.d_respV1c, tmp)
# allocate d_resp, which will contain the response to all 28
# (nrFilters) space-time orientations at 3 (nrScales) scales for
# every pixel location (nrX*nrY)
tmp = np.zeros(nrXY*self.nrFilters*self.nrScales).astype(np.float32)
cuda.memcpy_htod(self.d_resp, tmp)
示例11: _update_buffer
def _update_buffer(self):
if self.my_num_gpot_neurons>0:
cuda.memcpy_dtod(int(self.buffer.gpot_buffer.gpudata) + \
self.buffer.gpot_current*self.buffer.gpot_buffer.ld* \
self.buffer.gpot_buffer.dtype.itemsize, self.V.gpudata, \
self.V.nbytes)
if self.my_num_spike_neurons>0:
cuda.memcpy_dtod(int(self.buffer.spike_buffer.gpudata) + \
self.buffer.spike_current*self.buffer.spike_buffer.ld* \
self.buffer.spike_buffer.dtype.itemsize, self.spike_state.gpudata,\
int(self.spike_state.dtype.itemsize*self.my_num_spike_neurons))
示例12: arrayp2g
def arrayp2g(pary):
"""convert a PitchArray to a GPUArray"""
from pycuda.gpuarray import GPUArray
result = GPUArray(pary.shape, pary.dtype)
if pary.size:
if pary.M == 1:
cuda.memcpy_dtod(result.gpudata, pary.gpudata, pary.mem_size * pary.dtype.itemsize)
else:
PitchTrans(pary.shape, result.gpudata, _pd(result.shape), pary.gpudata, pary.ld, pary.dtype)
return result
示例13: _set_state
def _set_state(self, k, v):
cls = type(self)
if k in self.params_dict:
cuda.memcpy_dtod(self.states[k].gpudata,
self.params_dict[k].gpudata,
self.params_dict[k].nbytes)
else:
if isinstance(v, float):
self.states[k].fill(self.floattype(v))
else:
assert(v in cls.states)
self.states[k].fill(self.floattype(cls.states[v]))
示例14: update
def update(self):
nn, ne, nne = np.int32([self.nn, self.ne, self.nne])
dt, de, vf = np.float64([self.dt, self.de, self.vf])
bs, gs = (256,1,1), (self.nn//256+1,1)
ul, ul_prev, ul_tmp = self.ul_gpu, self.ul_prev_gpu, self.ul_tmp_gpu
kl = self.kl_gpu
el_sum = self.el_sum_gpu
c_ul_tmps = np.float32([0, 0.5, 0.5, 1])
c_uls = np.float32([1./6, 1./3, 1./3, 1./6])
cuda.memcpy_dtod(ul_prev, ul, self.ul.nbytes)
for c_ul_tmp, c_ul in zip(c_ul_tmps, c_uls):
self.update_pre(nn, nne, vf, c_ul_tmp, ul, ul_prev, ul_tmp, kl, el_sum, block=bs, grid=gs)
self.update_ul(nn, ne, nne, dt, de, vf, c_ul, ul, ul_tmp, kl, el_sum, block=bs, grid=gs)
示例15: stepFunction
def stepFunction():
global animIter
cuda.memcpy_dtod( plotDataFloat_d.ptr, concentrationOut_d.ptr, concentrationOut_d.nbytes )
maxVal = (gpuarray.max(plotDataFloat_d)).get()
multiplyByScalarReal( cudaPre(0.5/(maxVal)), plotDataFloat_d )
floatToUchar( plotDataFloat_d, plotDataChars_d)
copyToScreenArray()
if cudaP == "float": [ oneIteration_tex() for i in range(nIterationsPerPlot) ]
#else: [ oneIteration_sh() for i in range(nIterationsPerPlot//2) ]
if plotting and animIter%25 == 0:
maxVals.append( maxVal )
sumConc.append( gpuarray.sum(concentrationIn_d).get() )
plotData( maxVals, sumConc )
animIter += 1