本文整理汇总了Python中tvm.placeholder方法的典型用法代码示例。如果您正苦于以下问题:Python tvm.placeholder方法的具体用法?Python tvm.placeholder怎么用?Python tvm.placeholder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tvm
的用法示例。
在下文中一共展示了tvm.placeholder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_tvm_add
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def get_tvm_add():
# define compute
n = tvm.var('n')
A = tvm.placeholder(n, name='A', dtype='float32')
B = tvm.placeholder(n, name='B', dtype='float32')
C = tvm.compute((n,), lambda i: A[i] + B[i], name='C')
# build function (with parallel support)
with tvm.target.create('llvm'):
s = topi.generic.schedule_injective([C])
func_cpu = tvm.build(s, [A, B, C])
if mobula.utils.list_gpus():
with tvm.target.create('cuda'):
s = topi.generic.schedule_injective([C])
func_gpu = tvm.build(s, [A, B, C])
else:
func_gpu = None
return func_cpu, func_gpu
示例2: test_add
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_add(target_dir):
if not tvm.module.enabled("cuda"):
print("skip %s because cuda is not enabled..." % __file__)
return
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
bx, tx = s[C].split(C.op.axis[0], factor=64)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd")
fadd_cuda.save(os.path.join(target_dir, "add_gpu.o"))
fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx"))
cc.create_shared(os.path.join(target_dir, "add_gpu.so"),
[os.path.join(target_dir, "add_gpu.o")])
示例3: test_verify_memory_all_bind
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_verify_memory_all_bind():
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
# B is bound to threads.
s = tvm.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=64)
s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
func = lower(s, [A, B])
for dev_type in gpu_devices + other_devices:
assert tvm.ir_pass.VerifyMemory(func, dev_type)
# Computations are not bound.
# So VerifyMemory pass fails when device type is GPU.
#
示例4: test_verify_memory_partially_bind
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_verify_memory_partially_bind():
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C")
D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D")
# C is bound to threads, but B and D are not.
s = tvm.create_schedule([B.op, C.op, D.op])
bx, tx = s[C].split(C.op.axis[0], factor=64)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
func = lower(s, [A, B, C, D])
for dev_type in gpu_devices:
assert not tvm.ir_pass.VerifyMemory(func, dev_type)
for dev_type in other_devices:
assert tvm.ir_pass.VerifyMemory(func, dev_type)
示例5: test_bound3
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_bound3():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
s[A1].set_scope("shared")
xo, xi = s[A2].split(A2.op.axis[0], 32)
xi0, xi1 = s[A2].split(xi, nparts=16)
s[A2].bind(xi0, tvm.thread_axis("threadIdx.x"))
yo, yi = s[A2].split(A2.op.axis[1], 16)
# test normalize not affecting schedule
_ = s.normalize()
s[A2].reorder(xo, xi0, yo, xi1, yi)
s[A1].compute_at(s[A2], yo)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value==32)
assert(bounds[A1.op.axis[1]].extent.value==16)
示例6: test_bound_warp
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_bound_warp():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
s[A1].set_scope("warp")
xo, xi = s[A2].split(A2.op.axis[0], 32)
xi0, xi1 = s[A2].split(xi, factor=16)
tx = tvm.thread_axis("threadIdx.x")
s[A2].bind(xi1, tx)
s[A2].bind(xi0, tvm.thread_axis("threadIdx.y"))
y = s[A2].op.axis[1]
s[A1].compute_at(s[A2], y)
xo, xi = s[A1].split(s[A1].op.axis[0], factor=16)
s[A1].bind(xi, tx)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[A1.op.axis[0]].extent.value==16)
示例7: test_bound_nest_thread
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_bound_nest_thread():
m = tvm.var('m')
A = tvm.placeholder((m), name='A')
A1 = tvm.compute((m,), lambda i: A[i], name='A1')
A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2')
A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3')
s = tvm.create_schedule(A3.op)
s[A2].set_scope("shared")
s[A1].set_scope("local")
block_x = tvm.thread_axis("blockIdx.x")
thread_x = tvm.thread_axis("threadIdx.x")
bx, tx = s[A3].split(A3.op.axis[0], factor=32)
s[A3].bind(bx, block_x)
s[A3].bind(tx, thread_x)
s[A2].compute_at(s[A3], tx)
_, xi = s[A2].split(A2.op.axis[0], nparts=1)
s[A2].bind(xi, thread_x)
s[A1].compute_at(s[A3], tx)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
assert(bounds[A1.op.axis[0]].extent.value==1)
assert(bounds[A2.op.axis[0]].extent.value==32)
assert(bounds[A3.op.axis[0]].extent == m)
示例8: test_looptype
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_looptype():
@script
def looptype(a, b, c):
for i in parallel(8):
a[i] = i
for j in vectorize(8):
b[j] = j
for k in unroll(8):
c[k] = k
a = tvm.placeholder((8, ), name='a', dtype='int32')
b = tvm.placeholder((8, ), name='b', dtype='int32')
c = tvm.placeholder((8, ), name='c', dtype='int32')
ir = looptype(a, b, c)
iloop = ir.first
jloop = ir.rest.first
kloop = ir.rest.rest
assert iloop.for_type == tvm.stmt.For.Parallel
assert jloop.for_type == tvm.stmt.For.Vectorized
assert kloop.for_type == tvm.stmt.For.Unrolled
run_and_check(looptype, [a, b, c], [a, b, c])
示例9: test_flatten2
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_flatten2():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], 8)
s[A1].compute_at(s[A2], xo)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
stmt = tvm.ir_pass.Simplify(stmt)
示例10: test_flatten_storage_align
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_flatten_storage_align():
m = 8
l = 16
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
s[A1].storage_align(A1.op.axis[0], 2, 1)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
stmt = tvm.ir_pass.Simplify(stmt)
assert(stmt.body.extents[0].value == 17 * 8)
示例11: test_tensor_intrin
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_tensor_intrin():
n = 16
x = tvm.placeholder((n,), name='x')
y = tvm.placeholder((n,), name='y')
z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
def intrin_func(ins, outs):
assert(isinstance(ins[0], tvm.schedule.Buffer))
assert(ins[0].shape[0].value == n)
return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
intrin = tvm.decl_tensor_intrin(z.op, intrin_func)
assert intrin.op == z.op
assert intrin.reduce_init is None
assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
assert(intrin.buffers[0].shape[0].value == n)
m = 32
x = tvm.placeholder((m,), name='x')
y = tvm.placeholder((m,), name='y')
z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
s = tvm.create_schedule(z.op)
xo, xi = s[z].split(z.op.axis[0], factor=n)
s[z].tensorize(xi, intrin)
assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin)
assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized)
示例12: test_thread_axis
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_thread_axis():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B')
s = tvm.create_schedule(B.op)
s[B].set_scope("shared")
num_thread = 16
xo, xi = s[B].split(B.op.axis[0], 32)
xi0, xi1 = s[B].split(xi, nparts=num_thread)
s[B].bind(xi0, tvm.thread_axis("threadIdx.x"))
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
stmt = tvm.ir_pass.LoopPartition(stmt, False)
stmt = tvm.ir_pass.Simplify(stmt)
assert('if' not in str(stmt.body.body.body.first))
示例13: test_thread_axis2
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_thread_axis2():
n = tvm.convert(4096)
m = tvm.var('m')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
s = tvm.create_schedule(C.op)
num_thread = 32
bx, x = s[C].split(C.op.axis[0], factor=32)
tx, x = s[C].split(x, nparts=num_thread)
_, x = s[C].split(x, factor=m)
s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
stmt = lower(s, [A, B])
for_body = stmt.body.body.body.body.body.first
assert('threadIdx' not in str(for_body.extent))
示例14: test_combination
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_combination():
k = 3
n = 5
m = 10
x = tvm.var('x')
A = tvm.placeholder((n, m), name='A')
B = tvm.placeholder((n, m), name='B')
C = tvm.placeholder((n, m), name='C')
D = k + A - B * C / x
s = tvm.create_schedule(D.op)
foo = tvm.build(s, [x, A, B, C, D], "llvm")
ctx = tvm.cpu(0)
x = 2
a = tvm.nd.array(np.random.uniform(size=(n, m)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(n, m)).astype(B.dtype), ctx)
c = tvm.nd.array(np.random.uniform(size=(n, m)).astype(C.dtype), ctx)
d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
foo(x, a, b, c, d)
np.testing.assert_allclose(d.asnumpy(), k + a.asnumpy() - b.asnumpy() * c.asnumpy() / x)
示例15: test_inline_multi_reduce
# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import placeholder [as 别名]
def test_inline_multi_reduce():
def argmax_comp(x, y):
idx = tvm.select((x[1] >= y[1]), x[0], y[0])
val = tvm.select((x[1] >= y[1]), x[1], y[1])
return idx, val
def argmax_init(idx_typ, val_typ):
return tvm.const(-1, idx_typ), tvm.min_value(val_typ)
argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax')
m = tvm.var('m')
n = tvm.var('n')
val = tvm.placeholder((m, n), name='val', dtype='float32')
val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1')
val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2')
k = tvm.reduce_axis((0, n), 'k')
T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T')
s = tvm.create_schedule(T_idx.op)
s[val1].compute_inline()
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)