本文整理匯總了Golang中github.com/mumax/3/cuda/cu.LaunchKernel函數的典型用法代碼示例。如果您正苦於以下問題:Golang LaunchKernel函數的具體用法?Golang LaunchKernel怎麽用?Golang LaunchKernel使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了LaunchKernel函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: k_minimize_async
// Wrapper for minimize CUDA kernel, asynchronous.
func k_minimize_async(mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, m0x unsafe.Pointer, m0y unsafe.Pointer, m0z unsafe.Pointer, tx unsafe.Pointer, ty unsafe.Pointer, tz unsafe.Pointer, dt float32, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("minimize")
}
minimize_args.Lock()
defer minimize_args.Unlock()
if minimize_code == 0 {
minimize_code = fatbinLoad(minimize_map, "minimize")
}
minimize_args.arg_mx = mx
minimize_args.arg_my = my
minimize_args.arg_mz = mz
minimize_args.arg_m0x = m0x
minimize_args.arg_m0y = m0y
minimize_args.arg_m0z = m0z
minimize_args.arg_tx = tx
minimize_args.arg_ty = ty
minimize_args.arg_tz = tz
minimize_args.arg_dt = dt
minimize_args.arg_N = N
args := minimize_args.argptr[:]
cu.LaunchKernel(minimize_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("minimize")
}
}
示例2: k_reducemaxdiff_async
// Wrapper for reducemaxdiff CUDA kernel, asynchronous.
func k_reducemaxdiff_async(src1 unsafe.Pointer, src2 unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("reducemaxdiff")
}
reducemaxdiff_args.Lock()
defer reducemaxdiff_args.Unlock()
if reducemaxdiff_code == 0 {
reducemaxdiff_code = fatbinLoad(reducemaxdiff_map, "reducemaxdiff")
}
reducemaxdiff_args.arg_src1 = src1
reducemaxdiff_args.arg_src2 = src2
reducemaxdiff_args.arg_dst = dst
reducemaxdiff_args.arg_initVal = initVal
reducemaxdiff_args.arg_n = n
args := reducemaxdiff_args.argptr[:]
cu.LaunchKernel(reducemaxdiff_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("reducemaxdiff")
}
}
示例3: k_kernmulC_async
// Wrapper for kernmulC CUDA kernel, asynchronous.
func k_kernmulC_async(fftM unsafe.Pointer, fftK unsafe.Pointer, Nx int, Ny int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("kernmulC")
}
kernmulC_args.Lock()
defer kernmulC_args.Unlock()
if kernmulC_code == 0 {
kernmulC_code = fatbinLoad(kernmulC_map, "kernmulC")
}
kernmulC_args.arg_fftM = fftM
kernmulC_args.arg_fftK = fftK
kernmulC_args.arg_Nx = Nx
kernmulC_args.arg_Ny = Ny
args := kernmulC_args.argptr[:]
cu.LaunchKernel(kernmulC_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("kernmulC")
}
}
示例4: k_crop_async
// Wrapper for crop CUDA kernel, asynchronous.
func k_crop_async(dst unsafe.Pointer, Dx int, Dy int, Dz int, src unsafe.Pointer, Sx int, Sy int, Sz int, Offx int, Offy int, Offz int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("crop")
}
crop_args.Lock()
defer crop_args.Unlock()
if crop_code == 0 {
crop_code = fatbinLoad(crop_map, "crop")
}
crop_args.arg_dst = dst
crop_args.arg_Dx = Dx
crop_args.arg_Dy = Dy
crop_args.arg_Dz = Dz
crop_args.arg_src = src
crop_args.arg_Sx = Sx
crop_args.arg_Sy = Sy
crop_args.arg_Sz = Sz
crop_args.arg_Offx = Offx
crop_args.arg_Offy = Offy
crop_args.arg_Offz = Offz
args := crop_args.argptr[:]
cu.LaunchKernel(crop_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("crop")
}
}
示例5: k_shiftbytes_async
// Wrapper for shiftbytes CUDA kernel, asynchronous.
func k_shiftbytes_async(dst unsafe.Pointer, src unsafe.Pointer, Nx int, Ny int, Nz int, shx int, clamp byte, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("shiftbytes")
}
shiftbytes_args.Lock()
defer shiftbytes_args.Unlock()
if shiftbytes_code == 0 {
shiftbytes_code = fatbinLoad(shiftbytes_map, "shiftbytes")
}
shiftbytes_args.arg_dst = dst
shiftbytes_args.arg_src = src
shiftbytes_args.arg_Nx = Nx
shiftbytes_args.arg_Ny = Ny
shiftbytes_args.arg_Nz = Nz
shiftbytes_args.arg_shx = shx
shiftbytes_args.arg_clamp = clamp
args := shiftbytes_args.argptr[:]
cu.LaunchKernel(shiftbytes_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("shiftbytes")
}
}
示例6: k_dotproduct_async
// Wrapper for dotproduct CUDA kernel, asynchronous.
func k_dotproduct_async(dst unsafe.Pointer, prefactor float32, ax unsafe.Pointer, ay unsafe.Pointer, az unsafe.Pointer, bx unsafe.Pointer, by unsafe.Pointer, bz unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("dotproduct")
}
dotproduct_args.Lock()
defer dotproduct_args.Unlock()
if dotproduct_code == 0 {
dotproduct_code = fatbinLoad(dotproduct_map, "dotproduct")
}
dotproduct_args.arg_dst = dst
dotproduct_args.arg_prefactor = prefactor
dotproduct_args.arg_ax = ax
dotproduct_args.arg_ay = ay
dotproduct_args.arg_az = az
dotproduct_args.arg_bx = bx
dotproduct_args.arg_by = by
dotproduct_args.arg_bz = bz
dotproduct_args.arg_N = N
args := dotproduct_args.argptr[:]
cu.LaunchKernel(dotproduct_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("dotproduct")
}
}
示例7: k_regionaddv_async
// Wrapper for regionaddv CUDA kernel, asynchronous.
func k_regionaddv_async(dstx unsafe.Pointer, dsty unsafe.Pointer, dstz unsafe.Pointer, LUTx unsafe.Pointer, LUTy unsafe.Pointer, LUTz unsafe.Pointer, regions unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("regionaddv")
}
regionaddv_args.Lock()
defer regionaddv_args.Unlock()
if regionaddv_code == 0 {
regionaddv_code = fatbinLoad(regionaddv_map, "regionaddv")
}
regionaddv_args.arg_dstx = dstx
regionaddv_args.arg_dsty = dsty
regionaddv_args.arg_dstz = dstz
regionaddv_args.arg_LUTx = LUTx
regionaddv_args.arg_LUTy = LUTy
regionaddv_args.arg_LUTz = LUTz
regionaddv_args.arg_regions = regions
regionaddv_args.arg_N = N
args := regionaddv_args.argptr[:]
cu.LaunchKernel(regionaddv_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("regionaddv")
}
}
示例8: k_normalize_async
// Wrapper for normalize CUDA kernel, asynchronous.
func k_normalize_async(vx unsafe.Pointer, vy unsafe.Pointer, vz unsafe.Pointer, vol unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("normalize")
}
normalize_args.Lock()
defer normalize_args.Unlock()
if normalize_code == 0 {
normalize_code = fatbinLoad(normalize_map, "normalize")
}
normalize_args.arg_vx = vx
normalize_args.arg_vy = vy
normalize_args.arg_vz = vz
normalize_args.arg_vol = vol
normalize_args.arg_N = N
args := normalize_args.argptr[:]
cu.LaunchKernel(normalize_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("normalize")
}
}
示例9: k_zeromask_async
// Wrapper for zeromask CUDA kernel, asynchronous.
func k_zeromask_async(dst unsafe.Pointer, maskLUT unsafe.Pointer, regions unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("zeromask")
}
zeromask_args.Lock()
defer zeromask_args.Unlock()
if zeromask_code == 0 {
zeromask_code = fatbinLoad(zeromask_map, "zeromask")
}
zeromask_args.arg_dst = dst
zeromask_args.arg_maskLUT = maskLUT
zeromask_args.arg_regions = regions
zeromask_args.arg_N = N
args := zeromask_args.argptr[:]
cu.LaunchKernel(zeromask_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("zeromask")
}
}
示例10: k_mul_async
// Wrapper for mul CUDA kernel, asynchronous.
func k_mul_async(dst unsafe.Pointer, a unsafe.Pointer, b unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("mul")
}
mul_args.Lock()
defer mul_args.Unlock()
if mul_code == 0 {
mul_code = fatbinLoad(mul_map, "mul")
}
mul_args.arg_dst = dst
mul_args.arg_a = a
mul_args.arg_b = b
mul_args.arg_N = N
args := mul_args.argptr[:]
cu.LaunchKernel(mul_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("mul")
}
}
示例11: k_reducemaxvecdiff2_async
// Wrapper for reducemaxvecdiff2 CUDA kernel, asynchronous.
func k_reducemaxvecdiff2_async(x1 unsafe.Pointer, y1 unsafe.Pointer, z1 unsafe.Pointer, x2 unsafe.Pointer, y2 unsafe.Pointer, z2 unsafe.Pointer, dst unsafe.Pointer, initVal float32, n int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("reducemaxvecdiff2")
}
reducemaxvecdiff2_args.Lock()
defer reducemaxvecdiff2_args.Unlock()
if reducemaxvecdiff2_code == 0 {
reducemaxvecdiff2_code = fatbinLoad(reducemaxvecdiff2_map, "reducemaxvecdiff2")
}
reducemaxvecdiff2_args.arg_x1 = x1
reducemaxvecdiff2_args.arg_y1 = y1
reducemaxvecdiff2_args.arg_z1 = z1
reducemaxvecdiff2_args.arg_x2 = x2
reducemaxvecdiff2_args.arg_y2 = y2
reducemaxvecdiff2_args.arg_z2 = z2
reducemaxvecdiff2_args.arg_dst = dst
reducemaxvecdiff2_args.arg_initVal = initVal
reducemaxvecdiff2_args.arg_n = n
args := reducemaxvecdiff2_args.argptr[:]
cu.LaunchKernel(reducemaxvecdiff2_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("reducemaxvecdiff2")
}
}
示例12: k_llnoprecess_async
// Wrapper for llnoprecess CUDA kernel, asynchronous.
func k_llnoprecess_async(tx unsafe.Pointer, ty unsafe.Pointer, tz unsafe.Pointer, mx unsafe.Pointer, my unsafe.Pointer, mz unsafe.Pointer, hx unsafe.Pointer, hy unsafe.Pointer, hz unsafe.Pointer, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("llnoprecess")
}
llnoprecess_args.Lock()
defer llnoprecess_args.Unlock()
if llnoprecess_code == 0 {
llnoprecess_code = fatbinLoad(llnoprecess_map, "llnoprecess")
}
llnoprecess_args.arg_tx = tx
llnoprecess_args.arg_ty = ty
llnoprecess_args.arg_tz = tz
llnoprecess_args.arg_mx = mx
llnoprecess_args.arg_my = my
llnoprecess_args.arg_mz = mz
llnoprecess_args.arg_hx = hx
llnoprecess_args.arg_hy = hy
llnoprecess_args.arg_hz = hz
llnoprecess_args.arg_N = N
args := llnoprecess_args.argptr[:]
cu.LaunchKernel(llnoprecess_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("llnoprecess")
}
}
示例13: k_madd3_async
// Wrapper for madd3 CUDA kernel, asynchronous.
func k_madd3_async(dst unsafe.Pointer, src1 unsafe.Pointer, fac1 float32, src2 unsafe.Pointer, fac2 float32, src3 unsafe.Pointer, fac3 float32, N int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("madd3")
}
madd3_args.Lock()
defer madd3_args.Unlock()
if madd3_code == 0 {
madd3_code = fatbinLoad(madd3_map, "madd3")
}
madd3_args.arg_dst = dst
madd3_args.arg_src1 = src1
madd3_args.arg_fac1 = fac1
madd3_args.arg_src2 = src2
madd3_args.arg_fac2 = fac2
madd3_args.arg_src3 = src3
madd3_args.arg_fac3 = fac3
madd3_args.arg_N = N
args := madd3_args.argptr[:]
cu.LaunchKernel(madd3_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("madd3")
}
}
示例14: k_copypadmul_async
// Wrapper for copypadmul CUDA kernel, asynchronous.
func k_copypadmul_async(dst unsafe.Pointer, Dx int, Dy int, Dz int, src unsafe.Pointer, vol unsafe.Pointer, Sx int, Sy int, Sz int, BsatLUT unsafe.Pointer, regions unsafe.Pointer, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("copypadmul")
}
copypadmul_args.Lock()
defer copypadmul_args.Unlock()
if copypadmul_code == 0 {
copypadmul_code = fatbinLoad(copypadmul_map, "copypadmul")
}
copypadmul_args.arg_dst = dst
copypadmul_args.arg_Dx = Dx
copypadmul_args.arg_Dy = Dy
copypadmul_args.arg_Dz = Dz
copypadmul_args.arg_src = src
copypadmul_args.arg_vol = vol
copypadmul_args.arg_Sx = Sx
copypadmul_args.arg_Sy = Sy
copypadmul_args.arg_Sz = Sz
copypadmul_args.arg_BsatLUT = BsatLUT
copypadmul_args.arg_regions = regions
args := copypadmul_args.argptr[:]
cu.LaunchKernel(copypadmul_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("copypadmul")
}
}
示例15: k_resize_async
// Wrapper for resize CUDA kernel, asynchronous.
func k_resize_async(dst unsafe.Pointer, Dx int, Dy int, Dz int, src unsafe.Pointer, Sx int, Sy int, Sz int, layer int, scalex int, scaley int, cfg *config) {
if Synchronous { // debug
Sync()
timer.Start("resize")
}
resize_args.Lock()
defer resize_args.Unlock()
if resize_code == 0 {
resize_code = fatbinLoad(resize_map, "resize")
}
resize_args.arg_dst = dst
resize_args.arg_Dx = Dx
resize_args.arg_Dy = Dy
resize_args.arg_Dz = Dz
resize_args.arg_src = src
resize_args.arg_Sx = Sx
resize_args.arg_Sy = Sy
resize_args.arg_Sz = Sz
resize_args.arg_layer = layer
resize_args.arg_scalex = scalex
resize_args.arg_scaley = scaley
args := resize_args.argptr[:]
cu.LaunchKernel(resize_code, cfg.Grid.X, cfg.Grid.Y, cfg.Grid.Z, cfg.Block.X, cfg.Block.Y, cfg.Block.Z, 0, stream0, args)
if Synchronous { // debug
Sync()
timer.Stop("resize")
}
}