本文整理匯總了Python中cudamat.CUDAMatrix方法的典型用法代碼示例。如果您正苦於以下問題:Python cudamat.CUDAMatrix方法的具體用法?Python cudamat.CUDAMatrix怎麽用?Python cudamat.CUDAMatrix使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類cudamat
的用法示例。
在下文中一共展示了cudamat.CUDAMatrix方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: cudamat_to_cudandarray
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def cudamat_to_cudandarray(x):
""" take a cudamat.CUDAMatrix and make a CudaNdarray that point to its memory
"""
if not isinstance(x, cudamat.CUDAMatrix):
raise ValueError("We can transfer only cudamat.CUDAMatrix to CudaNdarray")
# elif x.dtype != "float32":
# raise ValueError("CudaNdarray support only float32")
# We don't need this, because cudamat is always float32.
else:
strides = [1]
for i in x.shape[::-1][:-1]:
strides.append(strides[-1] * i)
strides = tuple(strides[::-1])
import ctypes
ptr_long = long(ctypes.cast(x.mat.data_device, ctypes.c_void_p).value)
# seems legit.
z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x)
return z
示例2: return_NumpyArray
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def return_NumpyArray(input_array):
'''
If input is a numpy_array, return input
If input is a CUDAMatrix, return numpy array
'''
if isinstance(input_array, np.ndarray):
return input_array
elif isinstance(input_array, cm.CUDAMatrix):
if input_array.shape[1] == 1:
return input_array.asarray().flatten()
else:
return input_array.asarray()
return input_array
else:
raise ValueError('cannot handle input of type: %s'
% (type(input_array),))
示例3: _sig
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def _sig(self, x, u):
"""Multiple the matrix u by the covariance matrix of x. We are interested in situations where
n_variables >> n_samples, so we do this without explicitly constructing the covariance matrix."""
if self.gpu:
y = cm.empty((self.n_samples, self.m))
uc = cm.CUDAMatrix(u)
cm.dot(x, uc.T, target=y)
del uc
tmp = cm.empty((self.nv, self.m))
cm.dot(x.T, y, target=tmp)
tmp_dot = tmp.asarray()
del y
del tmp
else:
y = x.dot(u.T)
tmp_dot = x.T.dot(y)
prod = (1 - self.eps**2) * tmp_dot.T / self.n_samples + self.eps**2 * u # nv by m, <X_i Y_j> / std Y_j
return prod
示例4: _init_gpu
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def _init_gpu():
""" picks a board and claims it (if using cudamat aot npmat). exception if there is no board. """
if '__gpu_inited' in globals(): return
global _boardId
if _useGpu=='yes':
_boardId = ( board_id_to_use() if callable(board_id_to_use) else board_id_to_use)
if _boardId==-1: raise GnumpyGpuUnavailableException('No gpu board is available. gnumpy will not function. Consider telling it to run on the CPU by setting environment variable GNUMPY_USE_GPU to "no".')
_cudamat.cuda_set_device(_boardId)
_cudamat.cublas_init()
_cudamat.CUDAMatrix.init_random(0)
globals()['__gpu_inited'] = None
示例5: _new_cm
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def _new_cm(sizeOrShape):
"""
Internal.
Returns a new CUDAMatrix object of the given size.
This is the only proc that allocs gpu mem.
"""
global __memoryInUse
if type(sizeOrShape) == tuple:
if _prodT(sizeOrShape)==0: return _new_cm(1) # cudamat workaround: cudamat can't handle size 0 arrays
else: return _new_cm(sizeOrShape[0]*sizeOrShape[1]).reshape((sizeOrShape[1], sizeOrShape[0]))
size = sizeOrShape
if size==0: return _cudamat.empty((1, 1)) # cudamat workaround
if len(_cmsForReuse[size])!=0:
return _cm_reshape(_cmsForReuse[size].pop(), (1, size)) # re-use an abandoned cm
_init_gpu()
if __memoryInUse+size*4*5 > max_memory_usage: free_reuse_cache(False) # if we're somewhat close to the limit, then free what's easy to free, and hope that there are contiguous blocks available.
if __memoryInUse+size*4 > max_memory_usage: # if we're (still) OVER the limit, then do whatever can be done to make more mem available
free_reuse_cache(True) # gc.collect can take quite some time
if __memoryInUse+size*4 > max_memory_usage:
raise MemoryError('Gnumpy ran out of memory. Currently in use are %s; the maximum allowed is %s; so the present request for %s is refused. Free some memory and try again.' % (_n_bytes_str(__memoryInUse), _n_bytes_str(max_memory_usage), _n_bytes_str(size*4)))
try:
ret = _cudamat.empty((size, 1))
__memoryInUse += size*4 # do this only if the malloc succeeded
return ret
except _cudamat.CUDAMatException, e: # this means that malloc failed
raise MemoryError('The GPU failed to allocate the requested %d bytes of memory. This doesn\'t mean that your program is using too much memory. It does, however, mean that you should reduce the value of gnumpy.max_memory_usage (currently %s), to always have some memory unused (which is necessary to find contiguous large blocks of memory to allocate). Failing to allocate enough memory makes the GPU feel very unwell, so you are advised to restart Python now, or expect to see incoherent error messages and risk causing more serious damage.' % (size*4, str(max_memory_usage)))
示例6: seed_rand
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def seed_rand(seed=None):
_init_gpu()
if seed==None: seed = int(_time.time())
_cudamat.CUDAMatrix.init_random(seed)
示例7: return_CUDAMatrix
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def return_CUDAMatrix(input_array):
'''
If input is a numpy_array, convert to CUDAMatrix.
If input is already CUDAMatrix, return input
'''
if isinstance(input_array, np.ndarray):
if input_array.ndim == 1:
return cm.CUDAMatrix(input_array[:, np.newaxis])
else:
return cm.CUDAMatrix(input_array)
elif isinstance(input_array, cm.CUDAMatrix):
return input_array
else:
raise ValueError('cannot handle input of type: %s'
% (type(input_array),))
示例8: initParams
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def initParams(self):
"""
Initialize parameters using 6/sqrt(fanin+fanout)
"""
sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \
for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes]
if self.train:
# Now assuming that all layers are the same size
self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack]
self.deltasC_M = cm.empty((self.outputDim,self.maxBatch))
self.deltasOut_M = cm.empty((sizes[1],self.maxBatch))
self.deltasIn_M = cm.empty((sizes[1],self.maxBatch))
self.tmpGrad_M = cm.empty((self.layerSizes[0],self.maxBatch))
# Allocate memory once here and reuse
# Store probs
self.probs_M = cm.empty((self.outputDim,self.maxBatch))
# Store col max
self.rowVec_M = cm.empty((1,self.maxBatch))
self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
for w,b in self.stack]
示例9: fromFile
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def fromFile(self,fid):
import cPickle as pickle
stack = pickle.load(fid)
self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
for w,b in stack]
示例10: _norm
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def _norm(self, x, ws):
"""Calculate uj so that we can normalize it."""
if self.gpu:
y = cm.empty((self.n_samples, self.m))
wc = cm.CUDAMatrix(ws)
cm.dot(x, wc.T, target=y) # + noise, but it is included analytically
y_local = y.asarray()
del y
del wc
tmp_sum = np.einsum('lj,lj->j', y_local, y_local) # TODO: Should be able to do on gpu...
else:
y = x.dot(ws.T) # + noise / std Y_j^2, but it is included analytically
tmp_sum = np.einsum('lj,lj->j', y, y)
return np.sqrt((1 - self.eps**2) * tmp_sum / self.n_samples + self.eps**2 * np.sum(ws**2, axis=1))
示例11: _calculate_moments_syn
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def _calculate_moments_syn(self, x, ws, quick=False):
"""Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and
the value of the objective. Note it is assumed that <X_i^2> = 1! """
m = {} # Dictionary of moments
if self.gpu:
y = cm.empty((self.n_samples, self.m))
wc = cm.CUDAMatrix(ws)
cm.dot(x, wc.T, target=y) # + noise, but it is included analytically
del wc
else:
y = x.dot(ws.T) # + noise, but it is included analytically
if self.gpu:
tmp_dot = cm.empty((self.nv, self.m))
cm.dot(x.T, y, target=tmp_dot)
m["X_i Y_j"] = tmp_dot.asarray() / self.n_samples # nv by m, <X_i Y_j>
del y
del tmp_dot
else:
m["X_i Y_j"] = x.T.dot(y) / self.n_samples
m["cy"] = ws.dot(m["X_i Y_j"]) + self.yscale ** 2 * np.eye(self.m) # cov(y.T), m by m
m["Y_j^2"] = np.diag(m["cy"]).copy()
m["ry"] = m["cy"] / (np.sqrt(m["Y_j^2"]) * np.sqrt(m["Y_j^2"][:, np.newaxis]))
m["rho"] = (m["X_i Y_j"] / np.sqrt(m["Y_j^2"])).T
m["invrho"] = 1. / (1. - m["rho"]**2)
m["rhoinvrho"] = m["rho"] * m["invrho"]
m["Qij"] = np.dot(m['ry'], m["rhoinvrho"])
m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"])
m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0)
m["MI"] = - 0.5 * np.log1p(-m["rho"]**2)
m["X_i Z_j"] = np.linalg.solve(m["cy"], m["X_i Y_j"].T).T
m["X_i^2 | Y"] = (1. - np.einsum('ij,ij->i', m["X_i Z_j"], m["X_i Y_j"])).clip(1e-6)
mi_yj_x = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log(self.yscale ** 2)
mi_xi_y = - 0.5 * np.log(m["X_i^2 | Y"])
m["TCs"] = m["MI"].sum(axis=1) - mi_yj_x
m["additivity"] = (m["MI"].sum(axis=0) - mi_xi_y).sum()
m["TC"] = np.sum(mi_xi_y) - np.sum(mi_yj_x)
return m
示例12: preprocess
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def preprocess(self, x, fit=False):
"""Transform each marginal to be as close to a standard Gaussian as possible.
'standard' (default) just subtracts the mean and scales by the std.
'empirical' does an empirical gaussianization (but this cannot be inverted).
'outliers' tries to squeeze in the outliers
Any other choice will skip the transformation."""
if self.missing_values is not None:
x, self.n_obs = mean_impute(x, self.missing_values) # Creates a copy
else:
self.n_obs = len(x)
if self.gaussianize == 'none':
pass
elif self.gaussianize == 'standard':
if fit:
mean = np.mean(x, axis=0)
# std = np.std(x, axis=0, ddof=0).clip(1e-10)
std = np.sqrt(np.sum((x - mean)**2, axis=0) / self.n_obs).clip(1e-10)
self.theta = (mean, std)
x = ((x - self.theta[0]) / self.theta[1])
if np.max(np.abs(x)) > 6 and self.verbose:
print("Warning: outliers more than 6 stds away from mean. Consider using gaussianize='outliers'")
elif self.gaussianize == 'outliers':
if fit:
mean = np.mean(x, axis=0)
std = np.std(x, axis=0, ddof=0).clip(1e-10)
self.theta = (mean, std)
x = g((x - self.theta[0]) / self.theta[1]) # g truncates long tails
elif self.gaussianize == 'empirical':
print("Warning: correct inversion/transform of empirical gauss transform not implemented.")
x = np.array([norm.ppf((rankdata(x_i) - 0.5) / len(x_i)) for x_i in x.T]).T
if self.gpu and fit: # Don't return GPU matrices when only transforming
x = cm.CUDAMatrix(x)
return x
示例13: initParams
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def initParams(self):
"""
Initialize parameters using 6/sqrt(fanin+fanout)
"""
sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \
for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes]
if self.train:
# Now assuming that all layers are the same size
self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack]
self.deltasC_M = cm.empty((self.outputDim,self.maxBatch))
self.deltasOut_M = cm.empty((sizes[1],self.maxBatch))
self.deltasIn_M = cm.empty((sizes[1],self.maxBatch))
self.tmpGrad_M = cm.empty((self.layerSize,self.maxBatch))
# Allocate memory once here and reuse
# Store probs
self.probs_M = cm.empty((self.outputDim,self.maxBatch))
# Store col max
self.rowVec_M = cm.empty((1,self.maxBatch))
self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
for w,b in self.stack]
if self.temporalLayer > 0:
# dummy bias used for temporal layer
dummy = cm.empty((1,1))
dummy.assign(0.0)
scale = np.sqrt(6)/np.sqrt(self.layerSize*2)
wt = 2*scale*np.random.rand(self.layerSize,self.layerSize)-scale
wt = cm.CUDAMatrix(wt)
self.stack.append([wt,dummy])
if self.train:
dwt = cm.empty((self.layerSize,self.layerSize))
self.grad.append([dwt,dummy])
self.deltaTemp_M = cm.empty((self.layerSize,self.maxBatch))
示例14: costAndGrad
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def costAndGrad(self,data,labels):
batchSize = data.shape[1]
self.setViews(batchSize)
# forward prop
self.hActs[0].assign(cm.CUDAMatrix(data))
i = 1
for w,b in self.stack:
cm.dot(w,self.hActs[i-1],self.hActs[i])
self.hActs[i].add_col_vec(b)
if i <= len(self.layerSizes):
# hard relu
self.hActs[i].maximum(0.0)
i += 1
# Subtract max activation
self.hActs[-1].max(axis=0,target=self.rowVec)
self.hActs[-1].add_row_mult(self.rowVec,-1.0,target=self.probs)
# Softmax
cm.exp(self.probs)
self.probs.sum(axis=0,target=self.rowVec)
cm.pow(self.rowVec,-1.0,target=self.rowVec)
self.probs.mult_by_row(self.rowVec)
self.probs.copy_to_host()
cost, deltas, skip = ctc.ctc_loss(self.probs.numpy_array.astype(np.float64),
labels,blank=0)
self.deltasC.assign(cm.CUDAMatrix(deltas))
if skip:
return cost,self.grad,skip
# back prop
nl = len(self.layerSizes)
i = nl
deltasIn,deltasOut = self.deltasC,self.deltasOut
for w,b in reversed(self.stack):
# compute gradient
cm.dot(deltasIn,self.hActs[i].T,target=self.grad[i][0])
deltasIn.sum(axis=1,target=self.grad[i][1])
# compute next layer deltas
if i > 0:
self.hActs[i].sign(target=self.tmpGrad)
cm.dot(w.T,deltasIn,target=deltasOut)
deltasOut.mult(self.tmpGrad)
if i == nl:
deltasIn = self.deltasIn
deltasIn,deltasOut = deltasOut,deltasIn
i -= 1
return cost,self.grad,skip
示例15: initParams
# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import CUDAMatrix [as 別名]
def initParams(self):
"""
Initialize parameters using 6/sqrt(fanin+fanout)
"""
sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \
for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes]
if self.train:
# Now assuming that all layers are the same size
self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack]
self.deltasC_M = cm.empty((self.outputDim,self.maxBatch))
self.deltasOut_M = cm.empty((sizes[1],self.maxBatch))
self.deltasIn_M = cm.empty((sizes[1],self.maxBatch))
self.tmpGrad_M = cm.empty((self.layerSize,self.maxBatch))
# Allocate memory once here and reuse
# Store probs
self.probs_M = cm.empty((self.outputDim,self.maxBatch))
# Store col max
self.rowVec_M = cm.empty((1,self.maxBatch))
self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
for w,b in self.stack]
if self.temporalLayer > 0:
# dummy bias used for temporal layer
dummy = cm.empty((1,1))
dummy.assign(0.0)
scale = np.sqrt(6)/np.sqrt(self.layerSize*2)
wtf = cm.CUDAMatrix(2*scale*np.random.rand(self.layerSize,
self.layerSize)-scale)
wtb = cm.CUDAMatrix(2*scale*np.random.rand(self.layerSize,
self.layerSize)-scale)
self.stack.append([wtf,dummy])
self.stack.append([wtb,dummy])
# forward and backward activations for temporal layer
self.hActsFor_M = cm.empty((self.layerSize,self.maxBatch))
self.hActsBack_M = cm.empty((self.layerSize,self.maxBatch))
if self.train:
dwtf = cm.empty(wtf.shape)
self.grad.append([dwtf,dummy])
dwtb = cm.empty(wtb.shape)
self.grad.append([dwtb,dummy])
self.tmpGradBack_M = cm.empty((self.layerSize,self.maxBatch))
self.deltasFor_M = cm.empty((self.layerSize,self.maxBatch))
self.deltasBack_M = cm.empty((self.layerSize,self.maxBatch))