本文整理汇总了Python中numba.cuda.grid函数的典型用法代码示例。如果您正苦于以下问题:Python grid函数的具体用法?Python grid怎么用?Python grid使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grid函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compute_lifetimes_CUDA
def compute_lifetimes_CUDA(nweight, lifetimes):
edge = cuda.grid(1)
if edge >= lifetimes.size:
return
lifetimes[edge] = nweight[edge + 1] - nweight[edge]
示例2: d2_to_d1_sum
def d2_to_d1_sum(d1, d2):
qx = cuda.grid(1)
if qx >= len(d1):
return
tmp = d2[:, qx].sum()
d1[qx] = tmp
示例3: get_grad_omega
def get_grad_omega(grad_omega, omega, r, d, qbin):
"""
Get the gradient of the Debye sum with respect to atomic positions
Parameters
----------
grad_omega: kx3xQ array
The gradient
omega: kxQ array
Debye sum
r: k array
The pair distance array
d: kx3 array
The pair displacements
qbin: float
The qbin size
"""
kmax, _, qmax_bin = grad_omega.shape
k, qx = cuda.grid(2)
if k >= kmax or qx >= qmax_bin:
return
sv = f4(qx) * qbin
rk = r[k]
a = (sv * math.cos(sv * rk)) - omega[k, qx]
a /= rk * rk
for w in range(i4(3)):
grad_omega[k, w, qx] = a * d[k, w]
示例4: getWeightsOfEdges_gpu
def getWeightsOfEdges_gpu(edges, n_edges, weights, nweights):
"""
This function will take a list of edges (edges), the number of edges to
consider (n_edges, the weights of all the possible edges (weights) and the
array for the weights of the list of edges and put the weight of each edge
in the list of edges in the nweights, in the same position.
The kernel will also discard not considered edges, i.e. edges whose
argument >= n_edges.
Discarding an edge is done by replacing the edge by -1.
"""
# n_edges_sm = cuda.shared.array(1, dtype = int32)
edge = cuda.grid(1)
if edge >= edges.size:
return
# if edge == 0:
# n_edges_sm[0] = n_edges[0]
# cuda.syncthreads()
# if edge >= n_edges_sm[0]:
if edge >= n_edges[0]:
edges[edge] = -1
else:
myEdgeID = edges[edge]
nweights[edge] = weights[myEdgeID]
示例5: gpu_expand_mask_bits
def gpu_expand_mask_bits(bits, out):
"""Expand each bits in bitmask *bits* into an element in out.
This is a flexible kernel that can be launch with any number of blocks
and threads.
"""
for i in range(cuda.grid(1), out.size, cuda.gridsize(1)):
out[i] = mask_get(bits, i)
示例6: cufftShift_2D_kernel
def cufftShift_2D_kernel(data, N):
"""
adopted CUDA FFT shift code from:
https://github.com/marwan-abdellah/cufftShift
(GNU Lesser Public License)
"""
# // 2D Slice & 1D Line
sLine = N
sSlice = N * N
# // Transformations Equations
sEq1 = int((sSlice + sLine) / 2)
sEq2 = int((sSlice - sLine) / 2)
x, y = cuda.grid(2)
# // Thread Index Converted into 1D Index
index = (y * N) + x
if x < N / 2:
if y < N / 2:
# // First Quad
temp = data[index]
data[index] = data[index + sEq1]
# // Third Quad
data[index + sEq1] = temp
else:
if y < N / 2:
# // Second Quad
temp = data[index]
data[index] = data[index + sEq2]
data[index + sEq2] = temp
示例7: builtin_max
def builtin_max(A, B, C):
i = cuda.grid(1)
if i >= len(C):
return
C[i] = float64(max(A[i], B[i]))
示例8: vec_add_ilp_x4
def vec_add_ilp_x4(a, b, c):
# read
i = cuda.grid(1)
ai = a[i]
bi = b[i]
bw = cuda.blockDim.x
gw = cuda.gridDim.x
stride = gw * bw
j = i + stride
aj = a[j]
bj = b[j]
k = j + stride
ak = a[k]
bk = b[k]
l = k + stride
al = a[l]
bl = b[l]
# compute
ci = core(ai, bi)
cj = core(aj, bj)
ck = core(ak, bk)
cl = core(al, bl)
# write
c[i] = ci
c[j] = cj
c[k] = ck
c[l] = cl
示例9: kernel
def kernel(dst, src):
'''A simple kernel that adds 1 to every item
'''
i = cuda.grid(1)
if i >= dst.shape[0]:
return
dst[i] = src[i] + 1
示例10: addEdges
def addEdges(edges, n_edges, dest, weight, fe, od, top_edge, ndest, nweight):
n_edges_sm = cuda.shared.array(0, dtype = int32)
edge = cuda.grid(1)
# if edge == 0:
# n_edges_sm[0] = n_edges[0]
key = edges[edge]
# if edge is -1 it was marked for removal
if key == -1:
return
o_v = dest[key]
i_v = binaryOriginVertexSearch_CUDA(key, dest, fe, od)
# get and increment pointers for each vertex
i_ptr = cuda.atomic.add(top_edge, i_v, 1)
o_ptr =cuda.atomic.add(top_edge, o_v, 1)
# add edges to destination array
ndest[i_ptr] = o_v
ndest[o_ptr] = i_v
# add weight to edges
edge_w = weight[key]
nweight[i_ptr] = edge_w
nweight[o_ptr] = edge_w
示例11: lateral_inh
def lateral_inh(S, V, K_inh):
idx, idy, idz = cuda.grid(3)
if idx > V.shape[0] - 1:
return
if idy > V.shape[1] - 1:
return
if idz > V.shape[2] - 1:
return
# if neuron has not fired terminate the thread
if S[idx, idy, idz] != 1:
return
# if a neuron in this position has fired before do not fire again
if K_inh[idx, idy] == 0:
S[idx, idy, idz] = 0
return
# neuron at this position but in other input map
for k in range(V.shape[2]):
if S[idx, idy, k] == 1 and V[idx, idy, idz] < V[idx, idy, k]:
S[idx, idy, idz] = 0
return
K_inh[idx, idy] = 0
示例12: removeEdges
def removeEdges(edgeList, sortedArgs, n_discarded):
"""
inputs:
edgeList : list of edges
sortedArgs : argument list of the sorted weight list
n_discarded : number of edges to be discarded specified in sortedArgs
Remove discarded edges form the edge list.
Each edge discarded is replaced by -1.
Discard edges specified by the last n_discarded arguments
in the sortedArgs list.
"""
tgid = cuda.grid(1)
# one thread per edge that must be discarded
# total number of edges to be discarded is the difference
# between the between the total number of edges and the
# number of edges to be considered + the number edges
# to be discarded
if tgid >= n_discarded:
return
# remove not considered edges
elif tgid < n_considered_edges:
maxIdx = edgeList.size - 1 # maximum index of sortedArgs
index = maxIdx - tgid # index of
edgeList[index] = -1
示例13: gpu_gather
def gpu_gather(data, index, out):
i = cuda.grid(1)
if i < index.size:
idx = index[i]
# Only do it if the index is in range
if 0 <= idx < data.size:
out[i] = data[idx]
示例14: vec_add_ilp_x8
def vec_add_ilp_x8(a, b, c):
# read
i = cuda.grid(1)
ai = a[i]
bi = b[i]
bw = cuda.blockDim.x
gw = cuda.gridDim.x
stride = gw * bw
j = i + stride
aj = a[j]
bj = b[j]
k = j + stride
ak = a[k]
bk = b[k]
l = k + stride
al = a[l]
bl = b[l]
m = l + stride
am = a[m]
bm = b[m]
n = m + stride
an = a[n]
bn = b[n]
o = n + stride
ao = a[o]
bo = b[o]
p = o + stride
ap = a[o]
bp = b[o]
# compute
ci = core(ai, bi)
cj = core(aj, bj)
ck = core(ak, bk)
cl = core(al, bl)
cm = core(am, bm)
cn = core(an, bn)
co = core(ao, bo)
cp = core(ap, bp)
# write
c[i] = ci
c[j] = cj
c[k] = ck
c[l] = cl
c[m] = cm
c[n] = cn
c[o] = co
c[p] = cp
示例15: function_with_lots_of_registers
def function_with_lots_of_registers(x, a, b, c, d, e, f):
a1 = 1.0
a2 = 1.0
a3 = 1.0
a4 = 1.0
a5 = 1.0
b1 = 1.0
b2 = 1.0
b3 = 1.0
b4 = 1.0
b5 = 1.0
c1 = 1.0
c2 = 1.0
c3 = 1.0
c4 = 1.0
c5 = 1.0
d1 = 10
d2 = 10
d3 = 10
d4 = 10
d5 = 10
for i in range(a):
a1 += b
a2 += c
a3 += d
a4 += e
a5 += f
b1 *= b
b2 *= c
b3 *= d
b4 *= e
b5 *= f
c1 /= b
c2 /= c
c3 /= d
c4 /= e
c5 /= f
d1 <<= b
d2 <<= c
d3 <<= d
d4 <<= e
d5 <<= f
x[cuda.grid(1)] = a1 + a2 + a3 + a4 + a5
x[cuda.grid(1)] += b1 + b2 + b3 + b4 + b5
x[cuda.grid(1)] += c1 + c2 + c3 + c4 + c5
x[cuda.grid(1)] += d1 + d2 + d3 + d4 + d5