当前位置: 首页>>代码示例>>Python>>正文


Python gpuarray.sum函数代码示例

本文整理汇总了Python中pycuda.gpuarray.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: apply_mds_parallel2

	def apply_mds_parallel2(self):
		print("Applying parallel MDS via SMACOF...")
		current_time = time.clock()
		graph_d = gpu.to_gpu(np.float32(self.graph))
		row_sum_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
		score_current_d = gpu.to_gpu(np.float32(np.random.uniform(0, 10, size=self.N)))
		score_next_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
		sigma_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
		delta_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
		mds2_kernel = cuda_compile(_kernel_source, 'mds2_kernel')
		stress  = 1
		while (stress > 0.001):
			mds2_kernel(
				graph_d,
				row_sum_d,
				score_current_d,
				score_next_d,
				sigma_d,
				delta_d,
				np.int32(self.N),
				block=(1024, 1, 1), 
				grid=(int(self.N / 1024 + 1), int(1))
			)
			score_current_d = score_next_d
			score_next_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
			stress = gpu.sum(sigma_d).get() / gpu.sum(delta_d).get()
		self.outdata = score_current_d.get()
		print "Time to apply parallel MDS: %6.2f s" % (time.clock() - current_time)
开发者ID:physicsistic,项目名称:recommend_songs,代码行数:28,代码来源:isomap_parallel.py

示例2: psiDerivativecomputations

    def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
        ARD = (len(lengthscale)!=1)
        
        N,M,Q = self.get_dimensions(Z, variational_posterior)
        psi1_gpu = self.gpuCache['psi1_gpu']
        psi2n_gpu = self.gpuCache['psi2n_gpu']
        l_gpu = self.gpuCache['l_gpu']
        Z_gpu = self.gpuCache['Z_gpu']
        mu_gpu = self.gpuCache['mu_gpu']
        S_gpu = self.gpuCache['S_gpu']
        gamma_gpu = self.gpuCache['gamma_gpu']
        dvar_gpu = self.gpuCache['dvar_gpu']
        dl_gpu = self.gpuCache['dl_gpu']
        dZ_gpu = self.gpuCache['dZ_gpu']
        dmu_gpu = self.gpuCache['dmu_gpu']
        dS_gpu = self.gpuCache['dS_gpu']
        dgamma_gpu = self.gpuCache['dgamma_gpu']
        grad_l_gpu = self.gpuCache['grad_l_gpu']
        grad_mu_gpu = self.gpuCache['grad_mu_gpu']
        grad_S_gpu = self.gpuCache['grad_S_gpu']
        grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
        log_denom1_gpu = self.gpuCache['log_denom1_gpu']
        log_denom2_gpu = self.gpuCache['log_denom2_gpu']
        log_gamma_gpu = self.gpuCache['log_gamma_gpu']
        log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
        
        if self.GPU_direct:
            dL_dpsi1_gpu = dL_dpsi1
            dL_dpsi2_gpu = dL_dpsi2
            dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
        else:
            dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
            dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
            dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
            dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
            dL_dpsi0_sum = dL_dpsi0.sum()

        self.reset_derivative()
        # t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
        # print 'g_psi1compDer '+str(t)
        # t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
        # print 'g_psi2compDer '+str(t)
        self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
        self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))

        dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
        sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
        dL_dmu = grad_mu_gpu.get()
        sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
        dL_dS = grad_S_gpu.get()
        sum_axis(grad_gamma_gpu,dgamma_gpu,N*Q,self.blocknum)
        dL_dgamma = grad_gamma_gpu.get()
        dL_dZ = dZ_gpu.get()
        if ARD:
            sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
            dL_dlengscale = grad_l_gpu.get()
        else:
            dL_dlengscale = gpuarray.sum(dl_gpu).get()
            
        return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
开发者ID:Arthurkorn,项目名称:GPy,代码行数:60,代码来源:ssrbf_psi_gpucomp.py

示例3: _sum_axis

def _sum_axis(x_gpu, axis=None, out=None, calc_mean=False, ddof=0,
              keepdims=False):
    global _global_cublas_allocator
    assert isinstance(ddof, numbers.Integral)

    if axis is None or len(x_gpu.shape) <= 1:
        out_shape = (1,)*len(x_gpu.shape) if keepdims else ()
        if calc_mean == False:
            return gpuarray.sum(x_gpu).reshape(out_shape)
        else:
            return gpuarray.sum(x_gpu).reshape(out_shape) / (x_gpu.dtype.type(x_gpu.size-ddof))

    if axis < 0:
        axis += 2
    if axis > 1:
        raise ValueError('invalid axis')

    if x_gpu.flags.c_contiguous:
        n, m = x_gpu.shape[1], x_gpu.shape[0]
        lda = x_gpu.shape[1]
        trans = "n" if axis == 0 else "t"
        sum_axis, out_axis = (m, n) if axis == 0 else (n, m)
    else:
        n, m = x_gpu.shape[0], x_gpu.shape[1]
        lda = x_gpu.shape[0]
        trans = "t" if axis == 0 else "n"
        sum_axis, out_axis = (n, m) if axis == 0 else (m, n)

    if calc_mean:
        alpha = (1.0 / (sum_axis-ddof))
    else:
        alpha = 1.0
    if (x_gpu.dtype == np.complex64):
        gemv = cublas.cublasCgemv
    elif (x_gpu.dtype == np.float32):
        gemv = cublas.cublasSgemv
    elif (x_gpu.dtype == np.complex128):
        gemv = cublas.cublasZgemv
    elif (x_gpu.dtype == np.float64):
        gemv = cublas.cublasDgemv

    alloc = _global_cublas_allocator
    ons = ones((sum_axis, ), x_gpu.dtype, allocator=alloc)

    if keepdims:
        out_shape = (1, out_axis) if axis == 0 else (out_axis, 1)
    else:
        out_shape = (out_axis,)

    if out is None:
        out = gpuarray.empty(out_shape, x_gpu.dtype, alloc)
    else:
        assert out.dtype == x_gpu.dtype
        assert out.size >= out_axis

    gemv(_global_cublas_handle, trans, n, m,
         alpha, x_gpu.gpudata, lda,
         ons.gpudata, 1, 0.0, out.gpudata, 1)
    return out
开发者ID:Eric89GXL,项目名称:scikits.cuda,代码行数:59,代码来源:misc.py

示例4: Average_TotalProbabilityP

	def Average_TotalProbabilityP( self, Psi1_GPU, Psi2_GPU, Psi3_GPU, Psi4_GPU):
				
		temp =  gpuarray.sum(  Psi1_GPU*Psi1_GPU.conj()  ).get()
		temp += gpuarray.sum(  Psi2_GPU*Psi2_GPU.conj()  ).get()
		temp += gpuarray.sum(  Psi3_GPU*Psi3_GPU.conj()  ).get()
		temp += gpuarray.sum(  Psi4_GPU*Psi4_GPU.conj()  ).get()

		return temp * self.dPx*self.dPy
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:8,代码来源:GPU_DiracDaviau2D.py

示例5: gpu_sharpen

def gpu_sharpen(kernel, orig_image):
	# allocate memory for input and output
	curr_im, next_im = np.array(orig_image, dtype=np.float64), np.array(orig_image, dtype=np.float64)
	
	
	# Get image data
	height, width = np.int32(orig_image.shape)
	N = height * width
	print "Processing %d x %d image" % (width, height)

	# Allocate device memory and copy host to device
	start_transfer = time.time()
	d_curr = gpu.to_gpu(curr_im)
	d_next = gpu.to_gpu(next_im)
	stop_transfer = time.time()
  	host_to_device = stop_transfer - start_transfer
  	print "host to device tranfer time: " + str(host_to_device)

	# Block size (threads per block)
	b_size = (32, 32, 1)  
	33
	# Grid size (blocks per grid)
	g_size = (int(np.ceil(float(width)/float(b_size[0]))), int(np.ceil(float(height)/float(b_size[1])))) 
	# Initialize the GPU event trackers for timing
  	start_gpu_time = cu.Event()
  	end_gpu_time = cu.Event()
	
	start_gpu_time.record()
	
	# Compute the image's initial mean and variance
	init_mean = np.float64(gpu.sum(d_curr).get())/N

	var = ReductionKernel(dtype_out=np.float64, neutral= "0", reduce_expr= "a+b", map_expr="(x[i]-mu)*(x[i]-mu)/size", arguments="double* x, double mu, double size")
	init_variance = var(d_curr, np.float64(init_mean), np.float64(N)).get()
	
	variance = 0
	total = 0
	# while variance is less than a 20% difference from the initial variance, continue to sharpen
	while variance < 1.2 * init_variance:

		kernel(d_curr, EPSILON, d_next, height, width, block=b_size, grid=g_size)

		# Swap references to the images, next_im => curr_im
		d_curr, d_next = d_next, d_curr
		
		# calculate mean and variance
		mean = np.float64(gpu.sum(d_curr).get())/N

		variance = var(d_curr, np.float64(mean), np.float64(N)).get()
		
		print "Mean = %f,  Variance = %f" % (mean, variance)
	end_gpu_time.record()
	end_gpu_time.synchronize()
	gpu_time = start_gpu_time.time_till(end_gpu_time)*1*1e-3 

	print "GPU Time: %f" % gpu_time

	return d_curr.get()
开发者ID:ealehman,项目名称:image_video_edit,代码行数:58,代码来源:sharpen.py

示例6: gCOVAR

def gCOVAR(data1, data2):
	dA1 = gpuarray.to_gpu(data1.astype(np.float32))
	dA2 = gpuarray.to_gpu(data2.astype(np.float32))
	dM1 = gpuarray.sum(dA1)/len(data1)
	dM2 = gpuarray.sum(dA2)/len(data1)
		
	covar = np.float64(kn.kCOVAR(dA1, dA2, dM1, dM2).get()/len(data1))
	
	return covar
开发者ID:darien,项目名称:fatsheet,代码行数:9,代码来源:__init__.py

示例7: Norm_P_GPU

	def Norm_P_GPU( self, Psi1, Psi2, Psi3, Psi4):
		norm  = gpuarray.sum( Psi1.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi2.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi3.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi4.__abs__()**2  ).get()

		norm = np.sqrt(norm*self.dPx * self.dPy )	
		
		return norm
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:9,代码来源:GPU_DiracDaviau2D.py

示例8: gCORREL

def gCORREL(data1, data2):
	dA1 = gpuarray.to_gpu(data1.astype(np.float32))
	dA2 = gpuarray.to_gpu(data2.astype(np.float32))
	dM1 = gpuarray.sum(dA1)/len(data1)
	dM2 = gpuarray.sum(dA2)/len(data1)
	
	correl = np.float64(kn.kCOVAR(dA1, dA2, dM1, dM2).get() / \
	(kn.kSTDEV(dA1, dM1).get() * kn.kSTDEV(dA2, dM2).get())**.5)
	
	return correl
开发者ID:darien,项目名称:fatsheet,代码行数:10,代码来源:__init__.py

示例9: Norm_GPU

	def Norm_GPU( self, Psi1, Psi2, Psi3, Psi4):
		norm  = gpuarray.sum( Psi1.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi2.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi3.__abs__()**2  ).get()
		norm += gpuarray.sum( Psi4.__abs__()**2  ).get()

		norm = np.sqrt(norm*self.dX * self.dY * self.dZ )

		#print '               norm GPU = ', norm		
		
		return norm
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:11,代码来源:GPU_Dirac3D.py

示例10: gen_summary_stats

def gen_summary_stats(data):
    lb = data[data < mask]
    ub = data[data > mask]

    prob_ub = ub / lb
    n_ub = ub.size
    n_lb = lb.size
    mean_ub = gpuarray.sum(ub) / n_ub
    mean_lb = gpuarray.sum(lb) / n_lb
    var_ub = (ub - mean_ub)**2 / n_ub
    var_lb = (lb - mean_lb)**2 / n_lb
开发者ID:twiecki,项目名称:sim_drift_gpu,代码行数:11,代码来源:sim_drift_gpu.py

示例11: get_wigner_time

    def get_wigner_time(self, wigner_current, wigner_init, t):
        """
        Calculate the integral:

            int_{H(x, p, t) > -Ip} [wigner_current(x,p) - wigner_init(x,p)] dxdp

        :param wigner_current: gpuarray containing current Wigner function
        :param wigner_init: gpuarray containing initial Wigner function
        :param t: current time
        :return: float
        """
        # If kernel calculating the wigner time is not present, compile it
        try:
            wigner_time_mapper = self._wigner_time_mapper
        except AttributeError:
            # Allocate memory to map
            self._tmp_wigner_time = gpuarray.empty(self.rho.shape, np.float64)

            wigner_time_mapper = self._wigner_time_mapper = SourceModule(
                self.wigner_time_mapper_cuda_code.format(
                    cuda_consts=self.cuda_consts, K=self.K, V=self.V
                ),
            ).get_function("Kernel")

        wigner_time_mapper(self._tmp_wigner_time, wigner_current, wigner_init, t, **self.rho_mapper_params)

        return gpuarray.sum(self._tmp_wigner_time).get() * self.wigner_dxdp
开发者ID:andregcampos,项目名称:FastWigner,代码行数:27,代码来源:rho_vneumann_cuda_1d.py

示例12: ERA_probe

 def ERA_probe(self, iters=1):
     exits2_gpu = self.thr.empty_like(self.exits_gpu)
     print 'i, eMod, eSup'
     for i in range(iters):
         exits2_gpu = self.Pmod(self.exits_gpu)
         #
         self.error_mod.append(gpuarray.sum(abs(self.exits_gpu - exits2_gpu)**2).get()/self.diffNorm)
         #
         exits = exits2_gpu.get()
         self.Psup_probe(exits)
         #
         self.thr.to_device(makeExits2(self.sample, self.probe, self.coords, exits), dest=self.exits_gpu)
         #
         self.error_sup.append(gpuarray.sum(abs(self.exits_gpu - exits2_gpu)**2).get()/self.diffNorm)
         #
         update_progress(i / max(1.0, float(iters-1)), 'ERA probe', i, self.error_mod[-1], self.error_sup[-1])
开发者ID:andyofmelbourne,项目名称:Ptychography,代码行数:16,代码来源:Ptychography_2dsample_2dprobe_farfield_gpu.py

示例13: bloch_single_step_propagation

    def bloch_single_step_propagation(self, dbeta):
        """
        Perform a single step propagation with respect to the inverse temperature via the Bloch equation.
        The final Wigner function is not normalized.
        :param dbeta: (float) the inverse temperature step size
        :return: self.wignerfunction
        """
        self.p2theta_transform()
        self.bloch_expV_bulk(self.wigner_theta_x, dbeta, **self.V_bulk_mapper_params)
        self.bloch_expV_boundary(self.wigner_theta_x, dbeta, **self.V_boundary_mapper_params)
        self.theta2p_transform()

        self.x2lambda_transform()
        self.bloch_expK_bulk(self.wigner_p_lambda, dbeta, **self.K_bulk_mapper_params)
        self.bloch_expK_boundary(self.wigner_p_lambda, dbeta, **self.K_boundary_mapper_params)
        self.lambda2x_transform()

        self.p2theta_transform()
        self.bloch_expV_bulk(self.wigner_theta_x, dbeta, **self.V_bulk_mapper_params)
        self.bloch_expV_boundary(self.wigner_theta_x, dbeta, **self.V_boundary_mapper_params)
        self.theta2p_transform()

        # normalize
        self.wignerfunction /= gpuarray.sum(self.wignerfunction).get() * self.dXdP

        return self.wignerfunction
开发者ID:andregcampos,项目名称:FastWigner,代码行数:26,代码来源:wigner_bloch_cuda_1d.py

示例14: get_wigner

    def get_wigner(self):
        """
        Transform the density matrix saved in self.rho into the unormalized Wigner function
        :return: self.wignerfunction
        """
        # Create the density matrix out of the wavefunction
        self.psi2rho(self.wavefunction, self.wignerfunction, **self.wigner_mapper_params)

        # Step 1: Rotate by +45 degrees
        # Shear X
        cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
        self.phase_shearX(self.wignerfunction, **self.wigner_mapper_params)
        cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)

        # Shear Y
        cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)
        self.phase_shearY(self.wignerfunction, **self.wigner_mapper_params)
        cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)

        # Shear X
        cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
        self.phase_shearX(self.wignerfunction, **self.wigner_mapper_params)
        cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)

        # Step 2: FFt the Blokhintsev function
        self.sign_flip(self.wignerfunction, **self.wigner_mapper_params)
        cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)
        self.sign_flip(self.wignerfunction, **self.wigner_mapper_params)

        # normalize
        self.wignerfunction /= gpuarray.sum(self.wignerfunction).get().real * self.wigner_dXdP

        return self.wignerfunction
开发者ID:dibondar,项目名称:AccurateWigner,代码行数:33,代码来源:schrodinger_wigner_cuda_1d.py

示例15: _sum_axis

def _sum_axis(x_gpu, axis=None, out=None, calc_mean=False):
    global _global_cublas_allocator

    if axis is None:
        if calc_mean == False:
            return gpuarray.sum(x_gpu).get()
        else:
            return gpuarray.sum(x_gpu).get() / x_gpu.dtype.type(x_gpu.size)

    if axis < 0:
        axis += 2
    if axis > 1:
        raise ValueError('invalid axis')

    if x_gpu.flags.c_contiguous:
        n, m = x_gpu.shape[1], x_gpu.shape[0]
        lda = x_gpu.shape[1]
        trans = "n" if axis == 0 else "t"
        sum_axis, out_axis = (m, n) if axis == 0 else (n, m)
    else:
        n, m = x_gpu.shape[0], x_gpu.shape[1]
        lda = x_gpu.shape[0]
        trans = "t" if axis == 0 else "n"
        sum_axis, out_axis = (n, m) if axis == 0 else (m, n)

    alpha = (1.0 / sum_axis) if calc_mean else 1.0
    if (x_gpu.dtype == np.complex64):
        gemv = cublas.cublasCgemv
    elif (x_gpu.dtype == np.float32):
        gemv = cublas.cublasSgemv
    elif (x_gpu.dtype == np.complex128):
        gemv = cublas.cublasZgemv
    elif (x_gpu.dtype == np.float64):
        gemv = cublas.cublasDgemv

    alloc = _global_cublas_allocator
    ons = ones((sum_axis, ), x_gpu.dtype, alloc)
    if out is None:
        out = gpuarray.empty((out_axis, ), x_gpu.dtype, alloc)
    else:
        assert out.dtype == x_gpu.dtype
        assert out.size >= out_axis

    gemv(_global_cublas_handle, trans, n, m,
         alpha, x_gpu.gpudata, lda,
         ons.gpudata, 1, 0.0, out.gpudata, 1)
    return out
开发者ID:oursland,项目名称:scikits.cuda,代码行数:47,代码来源:misc.py


注:本文中的pycuda.gpuarray.sum函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。