本文整理匯總了Python中cudamat.empty方法的典型用法代碼示例。如果您正苦於以下問題:Python cudamat.empty方法的具體用法?Python cudamat.empty怎麽用?Python cudamat.empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類cudamat
的用法示例。
在下文中一共展示了cudamat.empty方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: dot
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def dot(a1, a2):
# internally: for matrix-matrix multiplies only; vectors are treated like special cases.
a1 = as_garray(a1); a2 = as_garray(a2)
if a1.ndim==0 or a2.ndim==0: return a1*a2
if a1.ndim==a2.ndim==1:
if a1 is a2: return sum(a1**2)
else: return dot(a1.reshape(1, a1.size), a2.reshape(a2.size, 1)).item()
if a1.ndim==2 and a2.ndim==1: return dot(a1, a2.reshape(a2.size, 1)).ravel() # treat a2 like a column vector
if a1.ndim==1 and a2.ndim==2: return dot(a1._add_axes(2), a2)[0] # treat a1 like a row vector
if a1.shape[-1] != a2.shape[-2]: raise ValueError('arrays not aligned for dot product. a dot product was requested of arrays with shapes %s and %s' % (a1.shape, a2.shape))
if a1.ndim==a2.ndim==2:
retShape = (a1.shape[0], a2.shape[1])
if a1.shape[1]==0: return zeros(retShape) # cudamat bug workaround
ret = empty(retShape)
if ret.size!=0: _cudamat.dot(a2._base_as_2d(), a1._base_as_2d(), ret._base_as_2d())
return ret
if a1.ndim >= 2 and a2.ndim >= 2:
# this is not necessarily fast, because if a2.ndim>=3 then it involves a transpose
a12 = ( a1.reshape_2d(-1) if a1.ndim!=2 else a1)
a22 = ( a2.transpose((a2.ndim-2,) + tuple(xrange(a2.ndim-2)) + (a2.ndim-1,)).reshape_2d(1)
if a2.ndim!=2 else
a2)
retShape = _deleteT2(a1.shape, -1) + _deleteT2(a2.shape, -2)
return dot(a12, a22).reshape(retShape)
raise NotImplementedError('dot with arguments of shapes %s and %s' % (a1.shape, a2.shape))
示例2: _sig
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _sig(self, x, u):
"""Multiple the matrix u by the covariance matrix of x. We are interested in situations where
n_variables >> n_samples, so we do this without explicitly constructing the covariance matrix."""
if self.gpu:
y = cm.empty((self.n_samples, self.m))
uc = cm.CUDAMatrix(u)
cm.dot(x, uc.T, target=y)
del uc
tmp = cm.empty((self.nv, self.m))
cm.dot(x.T, y, target=tmp)
tmp_dot = tmp.asarray()
del y
del tmp
else:
y = x.dot(u.T)
tmp_dot = x.T.dot(y)
prod = (1 - self.eps**2) * tmp_dot.T / self.n_samples + self.eps**2 * u # nv by m, <X_i Y_j> / std Y_j
return prod
示例3: _new_cm
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _new_cm(sizeOrShape):
"""
Internal.
Returns a new CUDAMatrix object of the given size.
This is the only proc that allocs gpu mem.
"""
global __memoryInUse
if type(sizeOrShape) == tuple:
if _prodT(sizeOrShape)==0: return _new_cm(1) # cudamat workaround: cudamat can't handle size 0 arrays
else: return _new_cm(sizeOrShape[0]*sizeOrShape[1]).reshape((sizeOrShape[1], sizeOrShape[0]))
size = sizeOrShape
if size==0: return _cudamat.empty((1, 1)) # cudamat workaround
if len(_cmsForReuse[size])!=0:
return _cm_reshape(_cmsForReuse[size].pop(), (1, size)) # re-use an abandoned cm
_init_gpu()
if __memoryInUse+size*4*5 > max_memory_usage: free_reuse_cache(False) # if we're somewhat close to the limit, then free what's easy to free, and hope that there are contiguous blocks available.
if __memoryInUse+size*4 > max_memory_usage: # if we're (still) OVER the limit, then do whatever can be done to make more mem available
free_reuse_cache(True) # gc.collect can take quite some time
if __memoryInUse+size*4 > max_memory_usage:
raise MemoryError('Gnumpy ran out of memory. Currently in use are %s; the maximum allowed is %s; so the present request for %s is refused. Free some memory and try again.' % (_n_bytes_str(__memoryInUse), _n_bytes_str(max_memory_usage), _n_bytes_str(size*4)))
try:
ret = _cudamat.empty((size, 1))
__memoryInUse += size*4 # do this only if the malloc succeeded
return ret
except _cudamat.CUDAMatException, e: # this means that malloc failed
raise MemoryError('The GPU failed to allocate the requested %d bytes of memory. This doesn\'t mean that your program is using too much memory. It does, however, mean that you should reduce the value of gnumpy.max_memory_usage (currently %s), to always have some memory unused (which is necessary to find contiguous large blocks of memory to allocate). Failing to allocate enough memory makes the GPU feel very unwell, so you are advised to restart Python now, or expect to see incoherent error messages and risk causing more serious damage.' % (size*4, str(max_memory_usage)))
示例4: _rand__base
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _rand__base(shapeInfo, distribution, zero_d_means_scalar):
if len(shapeInfo)==1 and _isSequence(shapeInfo[0]): zero_d_means_scalar = False; shapeInfo = shapeInfo[0]
ret = empty(shapeInfo)
{'uniform': _cmType.fill_with_rand, 'normal': _cmType.fill_with_randn}[distribution](ret._base)
if ret.size!=0 and _doExpensiveCheck(): assert ret.sum() < 100 + 2*ret.size, 'numerical gpu error: rand() gave a result>100'
if len(shapeInfo) == 0 and zero_d_means_scalar: return ret.item()
else: return ret
示例5: tile
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def tile(a, reps):
if type(reps) in _numberTypes: reps = (reps,)
reps = tuple(reps) # for generator expressions
if type(a) in _numberTypes:
ret = empty(reps)
ret._base.assign(a)
return ret
a = as_garray(a)
if len(reps) > a.ndim: a = a._add_axes(len(reps))
if len(reps) < a.ndim: reps = _extend_shape(reps, a.ndim) # now len(reps)==a.ndim
retShape = tuple([ a.shape[i] * reps[i] for i in tuple(xrange(len(reps)))])
if _prodT(retShape)==0: return zeros(retShape)
if _prodT(reps)==1: return a
for i in range(a.ndim-1): # merge replication requests on adjacent axes, for efficiency.
if reps[i]!=1 and reps[i+1]!=1 and a.shape[i]==1: return a.reshape(_deleteT2(a.shape, i)).tile(reps[:i]+(_prodT(reps[i:i+2]),)+reps[i+2:]).reshape(map(operator.mul, a.shape, reps))
def dataIDone(nextA, i): return nextA.reshape(_modifyT(a.shape, i, a.shape[i]*reps[i])).tile(_modifyT(reps, i, 1))
if reps[0]!=1: # replicating rows is easy and efficient: just repeat the data a number of times.
temp = empty((reps[0], a.size)) # shape doesn't matter because dataIDone changes it
tempCm = temp._base_shaped(1)
if reps[0]>=1:
_cm_row_slice_read(tempCm, 0, 1).assign(a._base_as_row())
nCopiesDone = 1
while nCopiesDone < reps[0]:
nNow = __builtin__.min(nCopiesDone, reps[0]-nCopiesDone)
_cm_row_slice_read(tempCm, nCopiesDone, nCopiesDone + nNow).assign(_cm_row_slice_read(tempCm, 0, nNow))
nCopiesDone += nNow
return dataIDone(temp, 0)
# the general case is repeating a subset (aot the whole array) n times, before moving on to the next subset
# using a transpose with the right shape, the subsets can become columns. those can be lengthened because that is replicating rows; a second transpose makes them now-lengthened subsets again
axis = __builtin__.min( i for i in range(a.ndim) if reps[i]!=1)
return dataIDone(a.reshape_2d(axis).T.tile((reps[axis], 1)).T, axis)
示例6: empty
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def empty(shape):
if _isSequence(shape) or type(shape) == types.GeneratorType: shape = tuple(shape)
else: shape = (shape,)
return garray(_new_cm(_prodT(shape)), shape, None)
示例7: __init__
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def __init__(self, data, copy=True, ndmin=0):
""" the parameters mean the same as in numpy.array() """
if type(data)!=_cmType: assert copy in (True, False) and type(ndmin) in _numberTypes, 'garray() parameters copy=%s, ndmin=%s are not of the right type' % (str(copy), str(ndmin))
if type(data)==_cmType: # internal use only. the 3 arguments are, unlike their names suggest, the ._base, .shape, ._is_alias_of
self._base = data
self._set_shape_info(copy)
self._is_alias_of = ndmin
if self._is_alias_of==None and track_memory_usage:
self.allocating_line = _calling_line()
tracked_arrays[id(self)] = self
_memoryUsers[self.allocating_line] = (_memoryUsers[self.allocating_line][0]+1, _memoryUsers[self.allocating_line][1]+self.size*4)
elif isinstance(data, garray):
if ndmin>0: data = data._add_axes(ndmin)
garray.__init__(self,
( _new_cm(data.size).assign(data._base_as_row()) if copy else data._base),
data.shape,
( None if copy else data))
elif type(data) == types.GeneratorType: garray.__init__(self, tuple(data), ndmin=ndmin)
elif _isSequence(data):
if len(data)==0 or not _any2_(data, is_garray): garray.__init__(self, numpy.array(data, ndmin=ndmin), copy=False)
else: garray.__init__(self, concatenate( as_garray(element)[None] for element in data), ndmin=ndmin) # no need to copy, because concat copies.
else: # remaining cases. essentially init from numpy array.
npa = numpy.array(data, copy=False) # in case data was a number
if str(npa.dtype) in ('object', '|S3'): raise TypeError('Cannot convert "%s" to a garray.' % data)
# we're not using the cudamat constructor, because that always allocs gpu mem, and this way the mem may come from re-use.
cm = _new_cm(npa.size)
if not hasattr(cm, 'numpy_array'):
#cm.copy_to_host() # if cm was created using cudamat.empty, this is needed to associate cm with a numpy array
# follows an inlined version of the relevant portion of cm.copy_to_host(). This is quicker because it doesn't actually copy.
cm.numpy_array = numpy.empty((cm.mat.size[0], cm.mat.size[1]), dtype=numpy.float32, order='F')
cm.mat.data_host = cm.numpy_array.ctypes.data_as(_ctypes.POINTER(_ctypes.c_float))
cm.mat.on_host = 1
if npa.size!=0: cm.numpy_array[:] = npa.reshape((-1, 1), order='C') # no cudamat.reformat is needed, because that's only dtype and order change, which are handled by the assignment anyway
cm.copy_to_device()
garray.__init__(self, cm, _extend_shape(npa.shape, ndmin), None)
示例8: T
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def T(self):
if self.ndim==2: # _base case
if self.size==0: return self.reshape(tuple(reversed(self.shape))) # cudamat bug workaround
if self.shape[1]>1e6: # cudamat bug workaround. with 2m columns it fails
return concatenate([ self[:, i*10**6 : (i+1)*10**6].T for i in range((self.shape[1]+10**6-1)//10**6)])
if self.shape[0]>1e6: # cudamat bug workaround. using concat is not an option, because that uses transpose.
ret = empty(tuple(reversed(self.shape)))
for i in range((self.shape[0]+10**6-1)//10**6):
ret[:, i*10**6 : (i+1)*10**6] = self[i*10**6 : (i+1)*10**6].T
return ret
return garray(self._base_as_2d().transpose(_new_cm(tuple(reversed(self.shape)))), tuple(reversed(self.shape)), None)
else: return self.transpose()
示例9: transpose
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def transpose(self, *axes):
""" like numpy.transpose, except that this doesn't return an alias, but rather a new array. """
# This is not really supported by cudamat, so it takes creativity. I handle a variety of cases differently.
if len(axes)==1 and not type(axes[0]) in _numberTypes: axes = tuple(axes[0])
if axes==_t0: axes = tuple(reversed(tuple(xrange(self.ndim))))
if axes == tuple(xrange(self.ndim)): return self.copy()
if tuple(sorted(axes)) != tuple(xrange(self.ndim)): raise ValueError("%s is not a valid argument to transpose() of an array of %d axes" % (axes, self.ndim))
for i in range(self.ndim-1):
if axes[i+1]==axes[i]+1: return (self. # see if the task can be simplified by collapsing some axes that are kept adjacent
reshape(self.shape[:axes[i]] + (_prodT(self.shape[axes[i]:axes[i]+2]),) + self.shape[axes[i]+2:]).
transpose((originalAxisI-(originalAxisI>axes[i])) for originalAxisI in _deleteT2(axes, i+1)).
reshape(self.shape[axisI] for axisI in axes))
if self.ndim==3 and hasattr(_cudamat, '_cudamat') and cudamatHas('transpose3') and self.size!=0:
reorderingI = {(0, 2, 1): 0, (1, 0, 2): 1, (2, 1, 0): 2}[axes]
ret = empty(tuple( self.shape[axisI] for axisI in axes))
gridX, gridY = (self.size+511)//512, 1
while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
_cudamat._cudamat.transpose3.restype = _ctypes.c_int
assert 0==_cudamat._cudamat.transpose3(_ctInt(gridX), _ctInt(gridY), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(self.shape[1]), _ctInt(self.shape[2]), _ctInt(reorderingI))
return ret
def shiftAxesRight(shiftN): return self.transpose_simple(-shiftN).transpose( (axisI+shiftN)%self.ndim for axisI in axes)
for i in range(self.ndim-1): # see if the task can be simplified by rotating axes right by 1. if so, the loop before this one can simplify further
if axes[i:i+2] == (self.ndim-1, 0): return shiftAxesRight(1)
# no further simplifications can be done. we need to proceed with a loop over the first axis. First rotate the intended axis to position 0.
if axes[0]!=0: return shiftAxesRight(-axes[0])
ret = empty( self.shape[axisI] for axisI in axes)
for i in range(self.shape[0]): ret[i] = self[i].transpose( x-1 for x in axes[1:])
return ret
示例10: __repr__
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def __repr__(self): return self.as_numpy_array().__repr__().replace('array(', 'garray(').replace('\n', '\n ').replace(', dtype=float32', '').replace(', dtype=float64', '') # 64 happens for empty arrays
示例11: compute_gamma_entropy
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def compute_gamma_entropy(self, G):
if not self.gpu:
Prod = G * (np.log(G) - 1)
ent = np.nan_to_num(Prod).sum()
else:
Prod = cm.empty(G.shape)
Prod = G.mult(cm.log(G.copy()).subtract(1), target=Prod)
ent = np.nan_to_num(Prod.asarray()).sum()
return ent
示例12: initParams
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def initParams(self):
"""
Initialize parameters using 6/sqrt(fanin+fanout)
"""
sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \
for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes]
if self.train:
# Now assuming that all layers are the same size
self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack]
self.deltasC_M = cm.empty((self.outputDim,self.maxBatch))
self.deltasOut_M = cm.empty((sizes[1],self.maxBatch))
self.deltasIn_M = cm.empty((sizes[1],self.maxBatch))
self.tmpGrad_M = cm.empty((self.layerSizes[0],self.maxBatch))
# Allocate memory once here and reuse
# Store probs
self.probs_M = cm.empty((self.outputDim,self.maxBatch))
# Store col max
self.rowVec_M = cm.empty((1,self.maxBatch))
self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
for w,b in self.stack]
示例13: _norm
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _norm(self, x, ws):
"""Calculate uj so that we can normalize it."""
if self.gpu:
y = cm.empty((self.n_samples, self.m))
wc = cm.CUDAMatrix(ws)
cm.dot(x, wc.T, target=y) # + noise, but it is included analytically
y_local = y.asarray()
del y
del wc
tmp_sum = np.einsum('lj,lj->j', y_local, y_local) # TODO: Should be able to do on gpu...
else:
y = x.dot(ws.T) # + noise / std Y_j^2, but it is included analytically
tmp_sum = np.einsum('lj,lj->j', y, y)
return np.sqrt((1 - self.eps**2) * tmp_sum / self.n_samples + self.eps**2 * np.sum(ws**2, axis=1))
示例14: _calculate_moments_syn
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _calculate_moments_syn(self, x, ws, quick=False):
"""Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and
the value of the objective. Note it is assumed that <X_i^2> = 1! """
m = {} # Dictionary of moments
if self.gpu:
y = cm.empty((self.n_samples, self.m))
wc = cm.CUDAMatrix(ws)
cm.dot(x, wc.T, target=y) # + noise, but it is included analytically
del wc
else:
y = x.dot(ws.T) # + noise, but it is included analytically
if self.gpu:
tmp_dot = cm.empty((self.nv, self.m))
cm.dot(x.T, y, target=tmp_dot)
m["X_i Y_j"] = tmp_dot.asarray() / self.n_samples # nv by m, <X_i Y_j>
del y
del tmp_dot
else:
m["X_i Y_j"] = x.T.dot(y) / self.n_samples
m["cy"] = ws.dot(m["X_i Y_j"]) + self.yscale ** 2 * np.eye(self.m) # cov(y.T), m by m
m["Y_j^2"] = np.diag(m["cy"]).copy()
m["ry"] = m["cy"] / (np.sqrt(m["Y_j^2"]) * np.sqrt(m["Y_j^2"][:, np.newaxis]))
m["rho"] = (m["X_i Y_j"] / np.sqrt(m["Y_j^2"])).T
m["invrho"] = 1. / (1. - m["rho"]**2)
m["rhoinvrho"] = m["rho"] * m["invrho"]
m["Qij"] = np.dot(m['ry'], m["rhoinvrho"])
m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"])
m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0)
m["MI"] = - 0.5 * np.log1p(-m["rho"]**2)
m["X_i Z_j"] = np.linalg.solve(m["cy"], m["X_i Y_j"].T).T
m["X_i^2 | Y"] = (1. - np.einsum('ij,ij->i', m["X_i Z_j"], m["X_i Y_j"])).clip(1e-6)
mi_yj_x = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log(self.yscale ** 2)
mi_xi_y = - 0.5 * np.log(m["X_i^2 | Y"])
m["TCs"] = m["MI"].sum(axis=1) - mi_yj_x
m["additivity"] = (m["MI"].sum(axis=0) - mi_xi_y).sum()
m["TC"] = np.sum(mi_xi_y) - np.sum(mi_yj_x)
return m
示例15: _reduction__base
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _reduction__base(self, operatorName, axis):
if axis==None: return self.ravel()._reduction__base(operatorName, 0).item()
if not type(axis) in _numberTypes: raise TypeError('the value %s is not appropriate for the "axis" parameter.' % str(axis))
if axis < -self.ndim or axis>=self.ndim: raise ValueError('axis (%d) out of bounds for an array with %d axes.' % (axis, self.ndim))
axis = int(axis) % self.ndim
if self.size==0:
retShape = _deleteT2(self.shape, axis)
if operatorName=='sum': return zeros(retShape)
elif operatorName=='max': return tile(-inf, retShape)
else: assert False
if operatorName=='max' and axis==0 and cudamatHas('maxAxis0'): # my own fast implementation
ret = empty(self.shape[1:])
_ctInt = _cudamat.ct.c_int
nThreadsPerBlock = 32
gridX, gridY = ((ret.size+nThreadsPerBlock-1)//nThreadsPerBlock), 1
while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
_cudamat._cudamat.maxAxis0.restype = _ctypes.c_int
assert 0==_cudamat._cudamat.maxAxis0(_ctInt(gridX), _ctInt(gridY), _ctInt(nThreadsPerBlock), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(ret.size))
return ret
if axis==0 and operatorName=='max': # max over rows is not yet supported in cudamat
return self.reshape_2d(1).T.max(1).reshape(self.shape[1:])
if axis==0 and self.ndim==1 and self.size>5000 and operatorName=='sum': # optimization. apparently, cudamat is not maximally efficient.
n = int(numpy.sqrt(self.size-1))
return self[:n*n].reshape((n, n))._reduction__base(operatorName, 0)._reduction__base(operatorName, 0) + self[n*n:]._reduction__base(operatorName, 0)
if operatorName=='sum':
chunkSize = 1024*256 # sum over longer dimensions fails in cudamat
nChunks = (self.shape[axis] + chunkSize-1) // chunkSize
if nChunks>1:
return reduceAdd( self[(slice(None),) * axis + (slice(chunkI*chunkSize, __builtin__.min(self.shape[axis], (chunkI+1)*chunkSize)),)]._reduction__base(operatorName, axis)
for chunkI in range(nChunks))
if operatorName=='max' and self.isnan().any2(): # cudamat bug workaround
return garray(self.asarray().max(axis))
operatorInCm = {'sum': _cmType.sum, 'max': _cmType.max}[operatorName]
if axis==0: return _check_number_types(garray(operatorInCm(self._base_shaped(1), 1, _new_cm(_prodT(self.shape[1:]))), self.shape[1:], None))
if axis==self.ndim-1:
if self.ndim!=2: return self.reshape_2d(-1)._reduction__base(operatorName, 1).reshape(self.shape[:-1])
if self.ndim==2:
chunkSize = 2**16-1
nChunks = (len(self) + chunkSize-1) // chunkSize
if nChunks>1: # cudamat chokes on big arrays, so break it in pieces for cudamat
chunks = tuple([ self[chunkI*chunkSize : __builtin__.min((chunkI+1)*chunkSize, len(self))]
for chunkI in range(nChunks)])
return concatenate([ chunk._reduction__base(operatorName, 1) for chunk in chunks])
else: # small array
return _check_number_types(garray(operatorInCm(self._base_shaped(1), 0, _new_cm((len(self), 1))), (len(self),), None))
return self.transpose_simple(axis)._reduction__base(operatorName, 0).transpose_simple(-axis)
# ------------------------------------------------------------------------------- external misc non-numerical