當前位置: 首頁>>代碼示例>>Python>>正文


Python gpuarray.to_gpu方法代碼示例

本文整理匯總了Python中pycuda.gpuarray.to_gpu方法的典型用法代碼示例。如果您正苦於以下問題:Python gpuarray.to_gpu方法的具體用法?Python gpuarray.to_gpu怎麽用?Python gpuarray.to_gpu使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pycuda.gpuarray的用法示例。


在下文中一共展示了gpuarray.to_gpu方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _cached_gpuarray

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def _cached_gpuarray(self, array):
        """
        Given a numpy array,
        calculate the python hash of its bytes;

        If it is not found in the cache, upload to gpu
        and store in cache, otherwise return cached allocation.
        """

        array = np.ascontiguousarray(array)
        key = hash(array.tobytes())
        try:
            array_gpu = self._gpuarray_cache[key]
        except KeyError:
            array_gpu = ga.to_gpu(array)
            self._gpuarray_cache[key] = array_gpu

        # for testing: read_back_and_check!

        return array_gpu 
開發者ID:quantumsim,項目名稱:quantumsim,代碼行數:22,代碼來源:cuda.py

示例2: gpu_initialise_rx_arrays

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpu_initialise_rx_arrays(G):
    """Initialise arrays on GPU for receiver coordinates and to store field components for receivers.

    Args:
        G (class): Grid class instance - holds essential parameters describing the model.
    """

    import pycuda.gpuarray as gpuarray

    # Array to store receiver coordinates on GPU
    rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32)
    for i, rx in enumerate(G.rxs):
        rxcoords[i, 0] = rx.xcoord
        rxcoords[i, 1] = rx.ycoord
        rxcoords[i, 2] = rx.zcoord

    # Array to store field components for receivers on GPU - rows are field components; columns are iterations; pages are receivers
    rxs = np.zeros((len(Rx.gpu_allowableoutputs), G.iterations, len(G.rxs)), dtype=floattype)

    # Copy arrays to GPU
    rxcoords_gpu = gpuarray.to_gpu(rxcoords)
    rxs_gpu = gpuarray.to_gpu(rxs)

    return rxcoords_gpu, rxs_gpu 
開發者ID:gprMax,項目名稱:gprMax,代碼行數:26,代碼來源:receivers.py

示例3: compute_vertical_bitvector_data

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def compute_vertical_bitvector_data(data, use_CUDA):
	#---build item to idx mapping---#
	idx = 0
	item2idx = {}
	for transaction in data:
		for item in transaction:
			if not item in item2idx:
				item2idx[item] = idx
				idx += 1
	idx2item = { idx : str(int(item)) for item, idx in item2idx.items() }
	#---build vertical data---#
	vb_data = np.zeros((len(item2idx), len(data)), dtype=int)
	for trans_id, transaction in enumerate(data):
		for item in transaction:
			vb_data[item2idx[item], trans_id] = 1
	if use_CUDA:
		vb_data = gpuarray.to_gpu(vb_data.astype(np.uint16))
	print('Data transformed into vertical bitvector representation with shape: ', np.shape(vb_data))
	return vb_data, idx2item


##############
# COMPUTE L1 #
############## 
開發者ID:andi611,項目名稱:Apriori-and-Eclat-Frequent-Itemset-Mining,代碼行數:26,代碼來源:eclat.py

示例4: gpuReduce

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpuReduce(self, outtype, mapper,reduceop, arglist):
        """Performs a map or reduce task on GPU by taking C code embedded in strings
	   >>> rdd = sc.parallelize(range(1,10000))
	   >>> rdd.gpuReduce(long, "x[i] * y[i]", "+" ,"long *x, long *y")
	"""
	cpudataset = np.asarray(self.collect())
	a = gpuarray.to_gpu(cpudataset)
	b = a
	
	if reduceop == "*": initval = "1"	
	elif reduceop == "+": initval = "0"
	else: print("Currently only \"+\" and \"*\" operations are supported \
		     by GPU reduction")
	
	reduceexpr = "a" + reduceop + "b"
	print reduceexpr

	krnl = reduction.ReductionKernel(outtype, neutral=initval, map_expr=mapper, reduce_expr=reduceexpr, arguments=arglist)
	results = krnl(a, b).get()
	print results 
開發者ID:adobe-research,項目名稱:spark-gpu,代碼行數:22,代碼來源:rdd.py

示例5: gpuWordCount

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpuWordCount(self):
        """Performs word count by first rearranging and superposing the input 
	   data to itself and tracking assiging each word a value of 1 by tracking 
	   space key (ASCII code = 32) occurence.
           >>> rdd = sc.textFile("README.md")
           >>> rdd.gpuWordCount()
        """
	import pycuda.driver as cuda
	
        start = time.time()
        cpudataset = " ".join(self.collect())

        asciidata = np.asarray([ord(x) for x in cpudataset], dtype=np.uint8)
        gpudataset = gpuarray.to_gpu(asciidata)
        countkrnl = reduction.ReductionKernel(long, neutral = "0",
                        map_expr = "(a[i] == 32)*(b[i] != 32)",
                        reduce_expr = "a + b", arguments = "char *a, char *b")

        results = 1 + countkrnl(gpudataset[:-1],gpudataset[1:]).get()
        return results 
開發者ID:adobe-research,項目名稱:spark-gpu,代碼行數:22,代碼來源:rdd.py

示例6: up_sweep

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def up_sweep(x):
    # let's typecast to be safe.
    x = np.float64(x)
    x_gpu = gpuarray.to_gpu(np.float64(x) )
    x_old_gpu = x_gpu.copy()
    for k in range( int(np.log2(x.size) ) ) : 
        num_threads = int(np.ceil( x.size / 2**(k+1)))
        grid_size = int(np.ceil(num_threads / 32))
        
        if grid_size > 1:
            block_size = 32
        else:
            block_size = num_threads
            
        up_gpu(x_gpu, x_old_gpu, np.int32(k)  , block=(block_size,1,1), grid=(grid_size,1,1))
        x_old_gpu[:] = x_gpu[:]
        
    x_out = x_gpu.get()
    return(x_out)

# kernel for down-sweep phase 
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:23,代碼來源:work-efficient_prefix.py

示例7: down_sweep

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def down_sweep(y):
    y = np.float64(y)
    y[-1] = 0
    y_gpu = gpuarray.to_gpu(y)
    y_old_gpu = y_gpu.copy()
    for k in reversed(range(int(np.log2(y.size)))):
        num_threads = int(np.ceil( y.size / 2**(k+1)))
        grid_size = int(np.ceil(num_threads / 32))
        
        if grid_size > 1:
            block_size = 32
        else:
            block_size = num_threads
            
        down_gpu(y_gpu, y_old_gpu, np.int32(k), block=(block_size,1,1), grid=(grid_size,1,1))
        y_old_gpu[:] = y_gpu[:]
    y_out = y_gpu.get()
    return(y_out)
    
   
# full implementation of work-efficient parallel prefix sum 
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:23,代碼來源:work-efficient_prefix.py

示例8: gpu_mandelbrot

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpu_mandelbrot(width, height, real_low, real_high, imag_low, imag_high, max_iters, upper_bound):

    # we set up our complex lattice as such
    real_vals = np.matrix(np.linspace(real_low, real_high, width), dtype=np.complex64)
    imag_vals = np.matrix(np.linspace( imag_high, imag_low, height), dtype=np.complex64) * 1j
    mandelbrot_lattice = np.array(real_vals + imag_vals.transpose(), dtype=np.complex64)    
    
    # copy complex lattice to the GPU
    mandelbrot_lattice_gpu = gpuarray.to_gpu(mandelbrot_lattice)

    # allocate an empty array on the GPU
    mandelbrot_graph_gpu = gpuarray.empty(shape=mandelbrot_lattice.shape, dtype=np.float32)

    mandel_ker( mandelbrot_lattice_gpu, mandelbrot_graph_gpu, np.int32(max_iters), np.float32(upper_bound))
              
    mandelbrot_graph = mandelbrot_graph_gpu.get()
    
    return mandelbrot_graph 
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:20,代碼來源:gpu_mandelbrot0.py

示例9: scikit_gpu_fft_pipeline

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def scikit_gpu_fft_pipeline(filename):
    data = []
    start = timer()
    with open(filename, 'r') as file_obj:
        for _ in range(((32768*1024*SIZE_MULTIPLIER//GULP_SIZE)//COMPLEX_MULTIPLIER)//GULP_FRAME_FFT):
            data = np.fromfile(file_obj, dtype=np.complex64, count=GULP_SIZE*GULP_FRAME_FFT).reshape((GULP_FRAME_FFT, GULP_SIZE))
            g_data = gpuarray.to_gpu(data)
            plan = Plan(data.shape[1], np.complex64, np.complex64, batch=GULP_FRAME_FFT)
            plan_inverse = Plan(data.shape[1], np.complex64, np.complex64, batch=GULP_FRAME_FFT)
            tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
            tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
            fft(g_data, tmp1, plan)
            ifft(tmp1, tmp2, plan_inverse)
            for _ in range(NUMBER_FFT-1):
                # Can't do FFT in place for fairness (emulating full pipeline)
                tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
                fft(tmp2, tmp1, plan)
                tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
                ifft(tmp1, tmp2, plan_inverse)
    end = timer()
    return end-start 
開發者ID:ledatelescope,項目名稱:bifrost,代碼行數:23,代碼來源:skcuda_fft_pipeline.py

示例10: interpolate

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def interpolate(self, flow, freqs, amps, phases):
        flow = numpy.float32(flow)
        texlen = numpy.int32(len(freqs))
        fmax = numpy.float32(freqs[texlen-1])
        freqs_gpu = gpuarray.to_gpu(freqs)
        freqs_gpu.bind_to_texref_ext(self.freq_tex, allow_offset=False)
        amps_gpu = gpuarray.to_gpu(amps)
        amps_gpu.bind_to_texref_ext(self.amp_tex, allow_offset=False)
        phases_gpu = gpuarray.to_gpu(phases)
        phases_gpu.bind_to_texref_ext(self.phase_tex, allow_offset=False)
        fn1 = self.fn1.prepared_call
        fn2 = self.fn2.prepared_call
        fn1((1, 1), (self.nb, 1, 1), self.lower, self.upper, texlen, self.df, flow, fmax)
        fn2((self.nb, 1), (self.nt, 1, 1), self.output, self.df, self.hlen, flow, fmax, texlen, self.lower, self.upper)
        pycbc.scheme.mgr.state.context.synchronize()
        return 
開發者ID:gwastro,項目名稱:pycbc,代碼行數:18,代碼來源:decompress_cuda.py

示例11: gpu_initialise_arrays

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpu_initialise_arrays(self):
        """Initialise standard field arrays on GPU."""

        import pycuda.gpuarray as gpuarray

        self.ID_gpu = gpuarray.to_gpu(self.ID)
        self.Ex_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
        self.Ey_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
        self.Ez_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
        self.Hx_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
        self.Hy_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
        self.Hz_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) 
開發者ID:gprMax,項目名稱:gprMax,代碼行數:14,代碼來源:grid.py

示例12: gpu_initialise_dispersive_arrays

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpu_initialise_dispersive_arrays(self):
        """Initialise dispersive material coefficient arrays on GPU."""

        import pycuda.gpuarray as gpuarray

        self.Tx_gpu = gpuarray.to_gpu(self.Tx)
        self.Ty_gpu = gpuarray.to_gpu(self.Ty)
        self.Tz_gpu = gpuarray.to_gpu(self.Tz)
        self.updatecoeffsdispersive_gpu = gpuarray.to_gpu(self.updatecoeffsdispersive) 
開發者ID:gprMax,項目名稱:gprMax,代碼行數:11,代碼來源:grid.py

示例13: from_np

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def from_np(self, matrix):
        return gpuarray.to_gpu(matrix.astype(self.floattype)) 
開發者ID:comp-imaging,項目名稱:ProxImaL,代碼行數:4,代碼來源:cuda_codegen.py

示例14: test_performance

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def test_performance(self):
        c = random.rand(2000,2000)
        x = Variable([2000,2000])
        K = np.abs(random.rand(9,9))
        G = CompGraph(vstack([ subsample((conv_nofft(K, x) -c)*5, [2,4]), x*10 ]))
        xtest1 = random.rand(2000*2000).astype(np.float32)
        ytest1 = np.zeros(G.output_size, dtype=np.float32)
        t1_cpu = time.time()
        for i in range(10):
            ytest1 = G.forward(xtest1, ytest1)
        t2_cpu = time.time()

        xtest = gpuarray.to_gpu(xtest1.astype(np.float32))
        ytest = gpuarray.to_gpu(ytest1.astype(np.float32))
        t1_gpu = time.time()
        for i in range(10):
            ytest = G.forward_cuda(xtest, ytest)
        t2_gpu = time.time()

        t_cpu = t2_cpu - t1_cpu
        t_gpu = t2_gpu - t1_gpu
        logging.info("Forward timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" % (t_cpu, t_gpu, t_gpu/t_cpu))
        self.assertTrue(t_gpu < t_cpu)

        t1_cpu = time.time()
        for i in range(10):
            xtest1 = G.adjoint(ytest1, xtest1)
        t2_cpu = time.time()

        t1_gpu = time.time()
        for i in range(10):
            xtest = G.adjoint_cuda(ytest, xtest)
        t2_gpu = time.time()

        t_cpu = t2_cpu - t1_cpu
        t_gpu = t2_gpu - t1_gpu
        logging.info("Adjoint timing: cpu=%.2f ms gpu=%.2f ms factor=%.3f" % (t_cpu, t_gpu, t_gpu/t_cpu))
        self.assertTrue(t_gpu < t_cpu)

        #print( G.start.adjoint_cuda(G, 0, "i", None)[0] ) 
開發者ID:comp-imaging,項目名稱:ProxImaL,代碼行數:42,代碼來源:test_cuda_comp_graph.py

示例15: gpuWordCount

# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import to_gpu [as 別名]
def gpuWordCount(self):
	def gpuFunc(iterator):
	    # 1. Data preparation
            iterator = iter(iterator)
            cpu_data = list(iterator)
            cpu_dataset = " ".join(cpu_data)
            ascii_data = np.asarray([ord(x) for x in cpu_dataset], dtype=np.uint8)

	    # 2. Driver initialization and data transfer
	    cuda.init()
	    dev = cuda.Device(0)
	    contx = dev.make_context()
            gpu_dataset = gpuarray.to_gpu(ascii_data)

	    # 3. GPU kernel.
	    # The kernel's algorithm counts the words by keeping 
	    # track of the space between them
            countkrnl = reduction.ReductionKernel(long, neutral = "0",
            		map_expr = "(a[i] == 32)*(b[i] != 32)",
                        reduce_expr = "a + b", arguments = "char *a, char *b")

            results = countkrnl(gpu_dataset[:-1],gpu_dataset[1:]).get()
            yield results

	    # Release GPU context resources
	    contx.pop() 
	    del gpu_dataset
            del contx
	   
	    gc.collect()            
		    	
    	vals = self.rdd.mapPartitions(gpuFunc)
	return vals 
開發者ID:adobe-research,項目名稱:spark-gpu,代碼行數:35,代碼來源:wordcount_mapp.py


注:本文中的pycuda.gpuarray.to_gpu方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。