本文整理匯總了Python中tvm.compute方法的典型用法代碼示例。如果您正苦於以下問題:Python tvm.compute方法的具體用法?Python tvm.compute怎麽用?Python tvm.compute使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tvm
的用法示例。
在下文中一共展示了tvm.compute方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_bound_scan
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_bound_scan():
m = tvm.var("m")
n = tvm.var("n")
X = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
s_state = tvm.placeholder((m, n))
s_init = tvm.compute((1, n), lambda _, i: X[0, i])
s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
s_scan = tvm.scan(s_init, s_update, s_state)
assert tuple(s_scan.shape) == (m, n)
s = tvm.create_schedule(s_scan.op)
XX = s.cache_read(X, "local", s_update)
xo, xi = s[s_update].split(s_update.op.axis[1], factor=4)
s[XX].compute_at(s[s_update], xo)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
assert bounds[XX.op.axis[1]].extent.value == 4
示例2: get_tvm_add
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def get_tvm_add():
# define compute
n = tvm.var('n')
A = tvm.placeholder(n, name='A', dtype='float32')
B = tvm.placeholder(n, name='B', dtype='float32')
C = tvm.compute((n,), lambda i: A[i] + B[i], name='C')
# build function (with parallel support)
with tvm.target.create('llvm'):
s = topi.generic.schedule_injective([C])
func_cpu = tvm.build(s, [A, B, C])
if mobula.utils.list_gpus():
with tvm.target.create('cuda'):
s = topi.generic.schedule_injective([C])
func_gpu = tvm.build(s, [A, B, C])
else:
func_gpu = None
return func_cpu, func_gpu
示例3: test_add
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_add(target_dir):
if not tvm.module.enabled("cuda"):
print("skip %s because cuda is not enabled..." % __file__)
return
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
bx, tx = s[C].split(C.op.axis[0], factor=64)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd")
fadd_cuda.save(os.path.join(target_dir, "add_gpu.o"))
fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx"))
cc.create_shared(os.path.join(target_dir, "add_gpu.so"),
[os.path.join(target_dir, "add_gpu.o")])
示例4: test_verify_memory_not_bind
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_verify_memory_not_bind():
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
# B is not bound to threads.
s = tvm.create_schedule(B.op)
func = lower(s, [A, B])
for dev_type in gpu_devices:
assert not tvm.ir_pass.VerifyMemory(func, dev_type)
for dev_type in other_devices:
assert tvm.ir_pass.VerifyMemory(func, dev_type)
# Computations are partially bound.
# So VerifyMemory pass fails when device type is GPU.
#
示例5: test_verify_memory_partially_bind
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_verify_memory_partially_bind():
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C")
D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D")
# C is bound to threads, but B and D are not.
s = tvm.create_schedule([B.op, C.op, D.op])
bx, tx = s[C].split(C.op.axis[0], factor=64)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
func = lower(s, [A, B, C, D])
for dev_type in gpu_devices:
assert not tvm.ir_pass.VerifyMemory(func, dev_type)
for dev_type in other_devices:
assert tvm.ir_pass.VerifyMemory(func, dev_type)
示例6: test_bound3
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_bound3():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
s[A1].set_scope("shared")
xo, xi = s[A2].split(A2.op.axis[0], 32)
xi0, xi1 = s[A2].split(xi, nparts=16)
s[A2].bind(xi0, tvm.thread_axis("threadIdx.x"))
yo, yi = s[A2].split(A2.op.axis[1], 16)
# test normalize not affecting schedule
_ = s.normalize()
s[A2].reorder(xo, xi0, yo, xi1, yi)
s[A1].compute_at(s[A2], yo)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value==32)
assert(bounds[A1.op.axis[1]].extent.value==16)
示例7: test_bound_nest_thread
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_bound_nest_thread():
m = tvm.var('m')
A = tvm.placeholder((m), name='A')
A1 = tvm.compute((m,), lambda i: A[i], name='A1')
A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2')
A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3')
s = tvm.create_schedule(A3.op)
s[A2].set_scope("shared")
s[A1].set_scope("local")
block_x = tvm.thread_axis("blockIdx.x")
thread_x = tvm.thread_axis("threadIdx.x")
bx, tx = s[A3].split(A3.op.axis[0], factor=32)
s[A3].bind(bx, block_x)
s[A3].bind(tx, thread_x)
s[A2].compute_at(s[A3], tx)
_, xi = s[A2].split(A2.op.axis[0], nparts=1)
s[A2].bind(xi, thread_x)
s[A1].compute_at(s[A3], tx)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
assert(bounds[A1.op.axis[0]].extent.value==1)
assert(bounds[A2.op.axis[0]].extent.value==32)
assert(bounds[A3.op.axis[0]].extent == m)
示例8: test_makeapi
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_makeapi():
"""Not yet working, mock design"""
n = tvm.var('n')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s = tvm.create_schedule(C.op)
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
Cb = tvm.decl_buffer(C.shape, C.dtype, name='C')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)
num_unpacked_args = 2
f = tvm.ir_pass.MakeAPI(
stmt, "myadd", [n, Ab, Bb, Cb], num_unpacked_args, True)
assert(f.handle_data_type[Ab.data].dtype == Ab.dtype)
assert(len(f.args) == 5)
output_ssa = False
示例9: test_flatten_storage_align
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_flatten_storage_align():
m = 8
l = 16
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
s[A1].storage_align(A1.op.axis[0], 2, 1)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
stmt = tvm.ir_pass.Simplify(stmt)
assert(stmt.body.extents[0].value == 17 * 8)
示例10: test_reorder
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_reorder():
m = tvm.var('m')
A = tvm.placeholder((m,), name='A')
T = tvm.compute(m, lambda i: A[i+1])
s = tvm.create_schedule(T.op)
xo, xi = s[T].split(T.op.axis[0], factor=10)
xi1, xi2 = s[T].split(xi, factor=2)
order = (xi2, xi1, xo)
assert tuple(s[T].leaf_iter_vars) != order
s[T].reorder(*order)
assert tuple(s[T].leaf_iter_vars) == order
try:
# pass duplicate IterVar
# must raise an error
s[T].reorder(xi2, xi1, xi2)
assert False
except tvm.TVMError:
pass
示例11: test_tensor_intrin
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_tensor_intrin():
n = 16
x = tvm.placeholder((n,), name='x')
y = tvm.placeholder((n,), name='y')
z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
def intrin_func(ins, outs):
assert(isinstance(ins[0], tvm.schedule.Buffer))
assert(ins[0].shape[0].value == n)
return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
intrin = tvm.decl_tensor_intrin(z.op, intrin_func)
assert intrin.op == z.op
assert intrin.reduce_init is None
assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
assert(intrin.buffers[0].shape[0].value == n)
m = 32
x = tvm.placeholder((m,), name='x')
y = tvm.placeholder((m,), name='y')
z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
s = tvm.create_schedule(z.op)
xo, xi = s[z].split(z.op.axis[0], factor=n)
s[z].tensorize(xi, intrin)
assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin)
assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized)
示例12: test_thread_axis
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_thread_axis():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B')
s = tvm.create_schedule(B.op)
s[B].set_scope("shared")
num_thread = 16
xo, xi = s[B].split(B.op.axis[0], 32)
xi0, xi1 = s[B].split(xi, nparts=num_thread)
s[B].bind(xi0, tvm.thread_axis("threadIdx.x"))
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
stmt = tvm.ir_pass.LoopPartition(stmt, False)
stmt = tvm.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body.body.body.first))
示例13: test_vectorize
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_vectorize():
n = tvm.var('n')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
bias = tvm.var("bias", dtype="float32")
scale = tvm.var("scale", dtype="float32")
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C')
# schedule
s = tvm.create_schedule(C.op)
# create iter var and assign them tags.
num_thread = 32
bx, x = s[C].split(C.op.axis[0], factor=num_thread*4)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=4)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
s[C].vectorize(x)
stmt = lower(s, [A, B])
body = stmt.body.body.body.body.body
assert(x.var.name not in str(body.condition))
assert(any(collect_visit(body.then_case, lambda x: isinstance(x, tvm.expr.Ramp))))
示例14: test_thread_axis2
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_thread_axis2():
n = tvm.convert(4096)
m = tvm.var('m')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
s = tvm.create_schedule(C.op)
num_thread = 32
bx, x = s[C].split(C.op.axis[0], factor=32)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=m)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
stmt = lower(s, [A, B])
for_body = stmt.body.body.body.body.body.first
assert('threadIdx' not in str(for_body.extent))
示例15: test_cuda_vectorize_load
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import compute [as 別名]
def test_cuda_vectorize_load():
num_thread = 8
def check_cuda(dtype, n, lanes):
if not tvm.gpu(0).exist or not tvm.module.enabled("cuda"):
print("skip because cuda is not enabled..")
return
ctx = tvm.gpu(0)
A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
B = tvm.compute((n,), lambda i: A[i], name='B')
s = tvm.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
fun = tvm.build(s, [A, B], "cuda", name="vector_load")
np_a = np.random.randint(low=-128, high=127, size=(n,lanes))
a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(np_a)
b = tvm.nd.empty((n,), B.dtype, ctx)
fun(a,b)
np.testing.assert_allclose(a.asnumpy(), b.asnumpy())
check_cuda("int8", 64, 8)
check_cuda("int8", 64, 16)