當前位置: 首頁>>代碼示例>>Python>>正文


Python cudamat.empty方法代碼示例

本文整理匯總了Python中cudamat.empty方法的典型用法代碼示例。如果您正苦於以下問題:Python cudamat.empty方法的具體用法?Python cudamat.empty怎麽用?Python cudamat.empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在cudamat的用法示例。


在下文中一共展示了cudamat.empty方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: dot

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def dot(a1, a2):
 # internally: for matrix-matrix multiplies only; vectors are treated like special cases.
 a1 = as_garray(a1); a2 = as_garray(a2)
 if a1.ndim==0 or a2.ndim==0: return a1*a2
 if a1.ndim==a2.ndim==1:
  if a1 is a2: return sum(a1**2)
  else: return dot(a1.reshape(1, a1.size), a2.reshape(a2.size, 1)).item()
 if a1.ndim==2 and a2.ndim==1: return dot(a1, a2.reshape(a2.size, 1)).ravel() # treat a2 like a column vector
 if a1.ndim==1 and a2.ndim==2: return dot(a1._add_axes(2), a2)[0]   # treat a1 like a row vector
 if a1.shape[-1] != a2.shape[-2]: raise ValueError('arrays not aligned for dot product. a dot product was requested of arrays with shapes %s and %s' % (a1.shape, a2.shape))
 if a1.ndim==a2.ndim==2:
  retShape = (a1.shape[0], a2.shape[1])
  if a1.shape[1]==0: return zeros(retShape) # cudamat bug workaround
  ret = empty(retShape)
  if ret.size!=0: _cudamat.dot(a2._base_as_2d(), a1._base_as_2d(), ret._base_as_2d())
  return ret
 if a1.ndim >= 2 and a2.ndim >= 2:
  # this is not necessarily fast, because if a2.ndim>=3 then it involves a transpose
  a12 = ( a1.reshape_2d(-1) if a1.ndim!=2 else a1)
  a22 = ( a2.transpose((a2.ndim-2,) + tuple(xrange(a2.ndim-2)) + (a2.ndim-1,)).reshape_2d(1)
          if a2.ndim!=2 else
          a2)
  retShape = _deleteT2(a1.shape, -1) + _deleteT2(a2.shape, -2)
  return dot(a12, a22).reshape(retShape)
 raise NotImplementedError('dot with arguments of shapes %s and %s' % (a1.shape, a2.shape)) 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:27,代碼來源:gnumpy.py

示例2: _sig

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _sig(self, x, u):
        """Multiple the matrix u by the covariance matrix of x. We are interested in situations where
        n_variables >> n_samples, so we do this without explicitly constructing the covariance matrix."""
        if self.gpu:
            y = cm.empty((self.n_samples, self.m))
            uc = cm.CUDAMatrix(u)
            cm.dot(x, uc.T, target=y)
            del uc
            tmp = cm.empty((self.nv, self.m))
            cm.dot(x.T, y, target=tmp)
            tmp_dot = tmp.asarray()
            del y
            del tmp
        else:
            y = x.dot(u.T)
            tmp_dot = x.T.dot(y)
        prod = (1 - self.eps**2) * tmp_dot.T / self.n_samples + self.eps**2 * u  # nv by m,  <X_i Y_j> / std Y_j
        return prod 
開發者ID:gregversteeg,項目名稱:LinearCorex,代碼行數:20,代碼來源:linearcorex.py

示例3: _new_cm

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _new_cm(sizeOrShape):
 """
 Internal.
 Returns a new CUDAMatrix object of the given size.
 This is the only proc that allocs gpu mem.
 """
 global __memoryInUse
 if type(sizeOrShape) == tuple:
  if _prodT(sizeOrShape)==0: return _new_cm(1) # cudamat workaround: cudamat can't handle size 0 arrays
  else: return _new_cm(sizeOrShape[0]*sizeOrShape[1]).reshape((sizeOrShape[1], sizeOrShape[0]))
 size = sizeOrShape
 if size==0: return _cudamat.empty((1, 1)) # cudamat workaround
 if len(_cmsForReuse[size])!=0:
  return _cm_reshape(_cmsForReuse[size].pop(), (1, size)) # re-use an abandoned cm
 _init_gpu()
 if __memoryInUse+size*4*5 > max_memory_usage: free_reuse_cache(False) # if we're somewhat close to the limit, then free what's easy to free, and hope that there are contiguous blocks available.
 if __memoryInUse+size*4 > max_memory_usage: # if we're (still) OVER the limit, then do whatever can be done to make more mem available
  free_reuse_cache(True) # gc.collect can take quite some time
  if __memoryInUse+size*4 > max_memory_usage:
   raise MemoryError('Gnumpy ran out of memory. Currently in use are %s; the maximum allowed is %s; so the present request for %s is refused. Free some memory and try again.' % (_n_bytes_str(__memoryInUse), _n_bytes_str(max_memory_usage), _n_bytes_str(size*4)))
 try:
  ret = _cudamat.empty((size, 1))
  __memoryInUse += size*4 # do this only if the malloc succeeded
  return ret
 except _cudamat.CUDAMatException, e: # this means that malloc failed
  raise MemoryError('The GPU failed to allocate the requested %d bytes of memory. This doesn\'t mean that your program is using too much memory. It does, however, mean that you should reduce the value of gnumpy.max_memory_usage (currently %s), to always have some memory unused (which is necessary to find contiguous large blocks of memory to allocate). Failing to allocate enough memory makes the GPU feel very unwell, so you are advised to restart Python now, or expect to see incoherent error messages and risk causing more serious damage.' % (size*4, str(max_memory_usage))) 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:28,代碼來源:gnumpy.py

示例4: _rand__base

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _rand__base(shapeInfo, distribution, zero_d_means_scalar):
 if len(shapeInfo)==1 and _isSequence(shapeInfo[0]): zero_d_means_scalar = False; shapeInfo = shapeInfo[0]
 ret = empty(shapeInfo)
 {'uniform': _cmType.fill_with_rand, 'normal': _cmType.fill_with_randn}[distribution](ret._base)
 if ret.size!=0 and _doExpensiveCheck(): assert ret.sum() < 100 + 2*ret.size, 'numerical gpu error: rand() gave a result>100'
 if len(shapeInfo) == 0 and zero_d_means_scalar: return ret.item()
 else: return ret 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:9,代碼來源:gnumpy.py

示例5: tile

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def tile(a, reps):
 if type(reps) in _numberTypes: reps = (reps,)
 reps = tuple(reps) # for generator expressions
 if type(a) in _numberTypes:
  ret = empty(reps)
  ret._base.assign(a)
  return ret
 a = as_garray(a)
 if len(reps) > a.ndim: a = a._add_axes(len(reps))
 if len(reps) < a.ndim: reps = _extend_shape(reps, a.ndim) # now len(reps)==a.ndim
 retShape = tuple([ a.shape[i] * reps[i] for i in tuple(xrange(len(reps)))])
 if _prodT(retShape)==0: return zeros(retShape)
 if _prodT(reps)==1: return a
 for i in range(a.ndim-1): # merge replication requests on adjacent axes, for efficiency.
  if reps[i]!=1 and reps[i+1]!=1 and a.shape[i]==1: return a.reshape(_deleteT2(a.shape, i)).tile(reps[:i]+(_prodT(reps[i:i+2]),)+reps[i+2:]).reshape(map(operator.mul, a.shape, reps))
 def dataIDone(nextA, i): return nextA.reshape(_modifyT(a.shape, i, a.shape[i]*reps[i])).tile(_modifyT(reps, i, 1))
 if reps[0]!=1: # replicating rows is easy and efficient: just repeat the data a number of times.
  temp = empty((reps[0], a.size)) # shape doesn't matter because dataIDone changes it
  tempCm = temp._base_shaped(1)
  if reps[0]>=1:
   _cm_row_slice_read(tempCm, 0, 1).assign(a._base_as_row())
   nCopiesDone = 1
   while nCopiesDone < reps[0]:
    nNow = __builtin__.min(nCopiesDone, reps[0]-nCopiesDone)
    _cm_row_slice_read(tempCm, nCopiesDone, nCopiesDone + nNow).assign(_cm_row_slice_read(tempCm, 0, nNow))
    nCopiesDone += nNow
  return dataIDone(temp, 0)
 # the general case is repeating a subset (aot the whole array) n times, before moving on to the next subset
 # using a transpose with the right shape, the subsets can become columns. those can be lengthened because that is replicating rows; a second transpose makes them now-lengthened subsets again
 axis = __builtin__.min( i for i in range(a.ndim) if reps[i]!=1)
 return dataIDone(a.reshape_2d(axis).T.tile((reps[axis], 1)).T, axis) 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:33,代碼來源:gnumpy.py

示例6: empty

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def empty(shape):
 if _isSequence(shape) or type(shape) == types.GeneratorType: shape = tuple(shape)
 else: shape = (shape,)
 return garray(_new_cm(_prodT(shape)), shape, None) 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:6,代碼來源:gnumpy.py

示例7: __init__

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def __init__(self, data, copy=True, ndmin=0):
  """ the parameters mean the same as in numpy.array() """
  if type(data)!=_cmType: assert copy in (True, False) and type(ndmin) in _numberTypes, 'garray() parameters copy=%s, ndmin=%s are not of the right type' % (str(copy), str(ndmin))
  if type(data)==_cmType: # internal use only. the 3 arguments are, unlike their names suggest, the ._base, .shape, ._is_alias_of
   self._base = data
   self._set_shape_info(copy)
   self._is_alias_of = ndmin
   if self._is_alias_of==None and track_memory_usage:
    self.allocating_line = _calling_line()
    tracked_arrays[id(self)] = self
    _memoryUsers[self.allocating_line] = (_memoryUsers[self.allocating_line][0]+1, _memoryUsers[self.allocating_line][1]+self.size*4)
  elif isinstance(data, garray):
   if ndmin>0: data = data._add_axes(ndmin)
   garray.__init__(self, 
    ( _new_cm(data.size).assign(data._base_as_row()) if copy else data._base),
    data.shape,
    ( None if copy else data))
  elif type(data) == types.GeneratorType: garray.__init__(self, tuple(data), ndmin=ndmin)
  elif _isSequence(data):
   if len(data)==0 or not _any2_(data, is_garray): garray.__init__(self, numpy.array(data, ndmin=ndmin), copy=False)
   else: garray.__init__(self, concatenate( as_garray(element)[None] for element in data), ndmin=ndmin) # no need to copy, because concat copies.
  else: # remaining cases. essentially init from numpy array.
   npa = numpy.array(data, copy=False) # in case data was a number
   if str(npa.dtype) in ('object', '|S3'): raise TypeError('Cannot convert "%s" to a garray.' % data) 
   # we're not using the cudamat constructor, because that always allocs gpu mem, and this way the mem may come from re-use.
   cm = _new_cm(npa.size)
   if not hasattr(cm, 'numpy_array'):
    #cm.copy_to_host() # if cm was created using cudamat.empty, this is needed to associate cm with a numpy array
    # follows an inlined version of the relevant portion of cm.copy_to_host(). This is quicker because it doesn't actually copy.
    cm.numpy_array = numpy.empty((cm.mat.size[0], cm.mat.size[1]), dtype=numpy.float32, order='F')
    cm.mat.data_host = cm.numpy_array.ctypes.data_as(_ctypes.POINTER(_ctypes.c_float))
    cm.mat.on_host = 1
   if npa.size!=0: cm.numpy_array[:] = npa.reshape((-1, 1), order='C') # no cudamat.reformat is needed, because that's only dtype and order change, which are handled by the assignment anyway
   cm.copy_to_device()
   garray.__init__(self, cm, _extend_shape(npa.shape, ndmin), None) 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:37,代碼來源:gnumpy.py

示例8: T

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def T(self):
  if self.ndim==2: # _base case
   if self.size==0: return self.reshape(tuple(reversed(self.shape))) # cudamat bug workaround
   if self.shape[1]>1e6: # cudamat bug workaround. with 2m columns it fails
    return concatenate([ self[:, i*10**6 : (i+1)*10**6].T for i in range((self.shape[1]+10**6-1)//10**6)])
   if self.shape[0]>1e6: # cudamat bug workaround. using concat is not an option, because that uses transpose.
    ret = empty(tuple(reversed(self.shape)))
    for i in range((self.shape[0]+10**6-1)//10**6):
     ret[:, i*10**6 : (i+1)*10**6] = self[i*10**6 : (i+1)*10**6].T 
    return ret
   return garray(self._base_as_2d().transpose(_new_cm(tuple(reversed(self.shape)))), tuple(reversed(self.shape)), None)
  else: return self.transpose() 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:14,代碼來源:gnumpy.py

示例9: transpose

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def transpose(self, *axes):
  """ like numpy.transpose, except that this doesn't return an alias, but rather a new array. """
  # This is not really supported by cudamat, so it takes creativity. I handle a variety of cases differently.
  if len(axes)==1 and not type(axes[0]) in _numberTypes: axes = tuple(axes[0])
  if axes==_t0: axes = tuple(reversed(tuple(xrange(self.ndim))))
  if axes == tuple(xrange(self.ndim)): return self.copy()
  if tuple(sorted(axes)) != tuple(xrange(self.ndim)): raise ValueError("%s is not a valid argument to transpose() of an array of %d axes" % (axes, self.ndim))
  for i in range(self.ndim-1): 
   if axes[i+1]==axes[i]+1: return (self. # see if the task can be simplified by collapsing some axes that are kept adjacent
    reshape(self.shape[:axes[i]] + (_prodT(self.shape[axes[i]:axes[i]+2]),) + self.shape[axes[i]+2:]).
    transpose((originalAxisI-(originalAxisI>axes[i])) for originalAxisI in _deleteT2(axes, i+1)).
    reshape(self.shape[axisI] for axisI in axes))
  if self.ndim==3 and hasattr(_cudamat, '_cudamat') and cudamatHas('transpose3') and self.size!=0:
   reorderingI = {(0, 2, 1): 0, (1, 0, 2): 1, (2, 1, 0): 2}[axes]
   ret = empty(tuple( self.shape[axisI] for axisI in axes))
   gridX, gridY = (self.size+511)//512, 1
   while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
   _cudamat._cudamat.transpose3.restype = _ctypes.c_int
   assert 0==_cudamat._cudamat.transpose3(_ctInt(gridX), _ctInt(gridY), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(self.shape[1]), _ctInt(self.shape[2]), _ctInt(reorderingI))
   return ret
  def shiftAxesRight(shiftN): return self.transpose_simple(-shiftN).transpose( (axisI+shiftN)%self.ndim for axisI in axes)
  for i in range(self.ndim-1): # see if the task can be simplified by rotating axes right by 1. if so, the loop before this one can simplify further
   if axes[i:i+2] == (self.ndim-1, 0): return shiftAxesRight(1)
  # no further simplifications can be done. we need to proceed with a loop over the first axis. First rotate the intended axis to position 0.
  if axes[0]!=0: return shiftAxesRight(-axes[0])
  ret = empty( self.shape[axisI] for axisI in axes)
  for i in range(self.shape[0]): ret[i] = self[i].transpose( x-1 for x in axes[1:])
  return ret 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:30,代碼來源:gnumpy.py

示例10: __repr__

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def __repr__(self): return self.as_numpy_array().__repr__().replace('array(', 'garray(').replace('\n', '\n ').replace(', dtype=float32', '').replace(', dtype=float64', '') # 64 happens for empty arrays 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:3,代碼來源:gnumpy.py

示例11: compute_gamma_entropy

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def compute_gamma_entropy(self, G):
        if not self.gpu:
            Prod = G * (np.log(G) - 1)
            ent = np.nan_to_num(Prod).sum()
        else:
            Prod = cm.empty(G.shape)
            Prod = G.mult(cm.log(G.copy()).subtract(1), target=Prod)
            ent = np.nan_to_num(Prod.asarray()).sum()
        return ent 
開發者ID:dmelis,項目名稱:otalign,代碼行數:11,代碼來源:gw_optim.py

示例12: initParams

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def initParams(self):
	"""
	Initialize parameters using 6/sqrt(fanin+fanout)
	"""
        sizes = [self.inputDim]+self.layerSizes+[self.outputDim]
        scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])]
        self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \
                            for n,m,s in zip(sizes[:-1],sizes[1:],scales)]
        self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes]

        if self.train:
            # Now assuming that all layers are the same size
            self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack]
            self.deltasC_M = cm.empty((self.outputDim,self.maxBatch))
            self.deltasOut_M = cm.empty((sizes[1],self.maxBatch)) 
            self.deltasIn_M = cm.empty((sizes[1],self.maxBatch)) 
            self.tmpGrad_M = cm.empty((self.layerSizes[0],self.maxBatch))
 
        # Allocate memory once here and reuse
        # Store probs
        self.probs_M = cm.empty((self.outputDim,self.maxBatch))
        # Store col max
        self.rowVec_M = cm.empty((1,self.maxBatch))
       
        self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)]
                    for w,b in self.stack] 
開發者ID:amaas,項目名稱:stanford-ctc,代碼行數:28,代碼來源:nnet.py

示例13: _norm

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _norm(self, x, ws):
        """Calculate uj so that we can normalize it."""
        if self.gpu:
            y = cm.empty((self.n_samples, self.m))
            wc = cm.CUDAMatrix(ws)
            cm.dot(x, wc.T, target=y)  # + noise, but it is included analytically
            y_local = y.asarray()
            del y
            del wc
            tmp_sum = np.einsum('lj,lj->j', y_local, y_local)  # TODO: Should be able to do on gpu...
        else:
            y = x.dot(ws.T)  # + noise / std Y_j^2, but it is included analytically
            tmp_sum = np.einsum('lj,lj->j', y, y)
        return np.sqrt((1 - self.eps**2) * tmp_sum / self.n_samples + self.eps**2 * np.sum(ws**2, axis=1)) 
開發者ID:gregversteeg,項目名稱:LinearCorex,代碼行數:16,代碼來源:linearcorex.py

示例14: _calculate_moments_syn

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _calculate_moments_syn(self, x, ws, quick=False):
        """Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and
        the value of the objective. Note it is assumed that <X_i^2> = 1! """
        m = {}  # Dictionary of moments
        if self.gpu:
            y = cm.empty((self.n_samples, self.m))
            wc = cm.CUDAMatrix(ws)
            cm.dot(x, wc.T, target=y)  # + noise, but it is included analytically
            del wc
        else:
            y = x.dot(ws.T)  # + noise, but it is included analytically
        if self.gpu:
            tmp_dot = cm.empty((self.nv, self.m))
            cm.dot(x.T, y, target=tmp_dot)
            m["X_i Y_j"] = tmp_dot.asarray() / self.n_samples  # nv by m,  <X_i Y_j>
            del y
            del tmp_dot
        else:
            m["X_i Y_j"] = x.T.dot(y) / self.n_samples
        m["cy"] = ws.dot(m["X_i Y_j"]) + self.yscale ** 2 * np.eye(self.m)  # cov(y.T), m by m
        m["Y_j^2"] = np.diag(m["cy"]).copy()
        m["ry"] = m["cy"] / (np.sqrt(m["Y_j^2"]) * np.sqrt(m["Y_j^2"][:, np.newaxis]))
        m["rho"] = (m["X_i Y_j"] / np.sqrt(m["Y_j^2"])).T
        m["invrho"] = 1. / (1. - m["rho"]**2)
        m["rhoinvrho"] = m["rho"] * m["invrho"]
        m["Qij"] = np.dot(m['ry'], m["rhoinvrho"])
        m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"])
        m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0)

        m["MI"] = - 0.5 * np.log1p(-m["rho"]**2)
        m["X_i Z_j"] = np.linalg.solve(m["cy"], m["X_i Y_j"].T).T
        m["X_i^2 | Y"] = (1. - np.einsum('ij,ij->i', m["X_i Z_j"], m["X_i Y_j"])).clip(1e-6)
        mi_yj_x = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log(self.yscale ** 2)
        mi_xi_y = - 0.5 * np.log(m["X_i^2 | Y"])
        m["TCs"] = m["MI"].sum(axis=1) - mi_yj_x
        m["additivity"] = (m["MI"].sum(axis=0) - mi_xi_y).sum()
        m["TC"] = np.sum(mi_xi_y) - np.sum(mi_yj_x)
        return m 
開發者ID:gregversteeg,項目名稱:LinearCorex,代碼行數:40,代碼來源:linearcorex.py

示例15: _reduction__base

# 需要導入模塊: import cudamat [as 別名]
# 或者: from cudamat import empty [as 別名]
def _reduction__base(self, operatorName, axis):
  if axis==None: return self.ravel()._reduction__base(operatorName, 0).item()
  if not type(axis) in _numberTypes: raise TypeError('the value %s is not appropriate for the "axis" parameter.' % str(axis))
  if axis < -self.ndim or axis>=self.ndim: raise ValueError('axis (%d) out of bounds for an array with %d axes.' % (axis, self.ndim))
  axis = int(axis) % self.ndim
  if self.size==0:
   retShape = _deleteT2(self.shape, axis)
   if operatorName=='sum': return zeros(retShape)
   elif operatorName=='max': return tile(-inf, retShape)
   else: assert False
  if operatorName=='max' and axis==0 and cudamatHas('maxAxis0'): # my own fast implementation
   ret = empty(self.shape[1:])
   _ctInt = _cudamat.ct.c_int
   nThreadsPerBlock = 32
   gridX, gridY = ((ret.size+nThreadsPerBlock-1)//nThreadsPerBlock), 1
   while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
   _cudamat._cudamat.maxAxis0.restype = _ctypes.c_int
   assert 0==_cudamat._cudamat.maxAxis0(_ctInt(gridX), _ctInt(gridY), _ctInt(nThreadsPerBlock), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(ret.size))
   return ret
  if axis==0 and operatorName=='max': # max over rows is not yet supported in cudamat
   return self.reshape_2d(1).T.max(1).reshape(self.shape[1:])
  if axis==0 and self.ndim==1 and self.size>5000 and operatorName=='sum': # optimization. apparently, cudamat is not maximally efficient.
   n = int(numpy.sqrt(self.size-1))
   return self[:n*n].reshape((n, n))._reduction__base(operatorName, 0)._reduction__base(operatorName, 0) + self[n*n:]._reduction__base(operatorName, 0)
  if operatorName=='sum':
   chunkSize = 1024*256 # sum over longer dimensions fails in cudamat
   nChunks = (self.shape[axis] + chunkSize-1) // chunkSize
   if nChunks>1:
    return reduceAdd( self[(slice(None),) * axis + (slice(chunkI*chunkSize, __builtin__.min(self.shape[axis], (chunkI+1)*chunkSize)),)]._reduction__base(operatorName, axis)
                      for chunkI in range(nChunks))
  if operatorName=='max' and self.isnan().any2(): # cudamat bug workaround
   return garray(self.asarray().max(axis))
  operatorInCm = {'sum': _cmType.sum, 'max': _cmType.max}[operatorName]
  if axis==0: return _check_number_types(garray(operatorInCm(self._base_shaped(1), 1, _new_cm(_prodT(self.shape[1:]))), self.shape[1:], None))
  if axis==self.ndim-1:
   if self.ndim!=2: return self.reshape_2d(-1)._reduction__base(operatorName, 1).reshape(self.shape[:-1])
   if self.ndim==2:
    chunkSize = 2**16-1
    nChunks = (len(self) + chunkSize-1) // chunkSize
    if nChunks>1: # cudamat chokes on big arrays, so break it in pieces for cudamat
     chunks = tuple([ self[chunkI*chunkSize : __builtin__.min((chunkI+1)*chunkSize, len(self))]
                      for chunkI in range(nChunks)])
     return concatenate([ chunk._reduction__base(operatorName, 1) for chunk in chunks])
    else: # small array
     return _check_number_types(garray(operatorInCm(self._base_shaped(1), 0, _new_cm((len(self), 1))), (len(self),), None))
  return self.transpose_simple(axis)._reduction__base(operatorName, 0).transpose_simple(-axis)
 

 
 # ------------------------------------------------------------------------------- external misc non-numerical 
開發者ID:renmengye,項目名稱:imageqa-public,代碼行數:52,代碼來源:gnumpy.py


注:本文中的cudamat.empty方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。