本文整理汇总了Python中tvm.lower函数的典型用法代码示例。如果您正苦于以下问题:Python lower函数的具体用法?Python lower怎么用?Python lower使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了lower函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: verify_softmax
def verify_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
s = topi.generic.schedule_softmax(B)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
check_device(device)
示例2: verify_softmax
def verify_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.cpp.nn.softmax(A, 1)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_softmax(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="softmax")
foo(a, b)
np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
示例3: verify_log_softmax
def verify_log_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
s = topi.generic.schedule_softmax(B)
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="log_softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ["opengl"]:
check_device(device)
示例4: test_lstm_cell_inline
def test_lstm_cell_inline():
num_step = 128
num_input = 256
num_hidden = 1152
batch_size = 4
# Global transition matrix
X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
s_init_c = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
s_init_h = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
k = tvm.reduce_axis((0, num_input), name="ki2h")
s_i2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
name="s_i2h")
k = tvm.reduce_axis((0, num_hidden), name="ki2h")
s_h2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
gates = tvm.compute(s_i2h.shape, lambda *i:
s_i2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
next_c = tvm.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
next_h = tvm.compute(gshape,
lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
scan_h, scan_c = tvm.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[X],
name="lstm_scan")
# schedule
s = tvm.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
s[in_transform].compute_inline()
s[forget_gate].compute_inline()
s[out_gate].compute_inline()
# verify we can lower correctly
tvm.lower(s, [X, Wi2h, Wh2h, scan_h, scan_c])
示例5: test_loop_dependent_allocate
def test_loop_dependent_allocate():
N = tvm.var("N")
A = tvm.placeholder((2*N,), "float32", "A")
C = tvm.compute((N, ), lambda i: A[2*i] + A[i+1], name='C')
s = tvm.create_schedule(C.op)
AA = s.cache_read(A, "local", [C])
s[AA].compute_at(s[C], s[C].op.axis[0])
# this line should fail due to IRUseDefAnalysis sees an allocate statement
# referencing undefined variable
tvm.lower(s, [A,C])
示例6: verify_log_softmax
def verify_log_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
for device in get_all_backend():
check_device(A, B, a_np, b_np, device, "log_softmax")
示例7: verify_softmax
def verify_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
check_device(A, B, a_np, b_np, device, "softmax")
示例8: run_inference
def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad, hstride, wstride):
"""
Runs the inference and checks the functional correctness between
compute and schedule outputs
"""
(data_shape, kernel_shape, o_shape) = get_shape(im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad,
hstride, wstride, out_dtype)
# Create TVM placeholders
data = tvm.placeholder(data_shape, name='data', dtype=data_dtype)
kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
# Create the numpy arrays to be used for executing conv models
if data_dtype == 'float32':
data_array = tvm.nd.array(np.random.rand(*data_shape).astype(dtype=data_dtype), CTX)
kernel_array = tvm.nd.array(np.random.rand(*kernel_shape).astype(dtype=kernel_dtype), CTX)
else:
data_array = tvm.nd.array(np.random.randint(100, size=data_shape).astype(data_dtype))
kernel_array = tvm.nd.array(np.random.randint(100, size=kernel_shape).astype(kernel_dtype))
# c_orig will be used for declaration ouptut
# c_sch will be used for scheduled computation output
c_orig = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
c_sch = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
with tvm.target.create(TARGET_NAME):
conv = topi.nn.conv2d_NCHWc(data, kernel, stride=hstride,
padding=hpad, layout='NCHWc',
out_layout='NCHWc', out_dtype=out_dtype)
out = topi.nn.relu(conv)
sch = tvm.create_schedule(out.op)
func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out')
func(data_array, kernel_array, c_orig)
LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True))
# Generate and run the optimized schedule
sconv = topi.generic.nn.schedule_conv2d_NCHWc(outs=[out])
func = tvm.build(sconv, [data, kernel, out], target=TARGET_NAME, name='conv')
func(data_array, kernel_array, c_sch)
# Functional check
if data_dtype == 'uint8':
np.testing.assert_equal(c_orig.asnumpy(), c_sch.asnumpy())
else:
assert np.allclose(c_orig.asnumpy(), c_sch.asnumpy())
evaluator = func.time_evaluator(func.entry_name, CTX, number=1000)
LOGGER.debug(tvm.lower(sconv, [data, kernel], simple_mode=True))
return evaluator(data_array, kernel_array, c_sch).mean
示例9: test_add_pipeline
def test_add_pipeline():
nn = 64
max_threads = 4
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
with ib.for_range(0, (n+1) // 2) as i:
ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
def extern_generator_gpu(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
bx = tvm.thread_axis("blockIdx.x")
tx = tvm.thread_axis("threadIdx.x")
ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads)
ib.scope_attr(tx, "thread_extent", max_threads)
idx = bx.var * max_threads + tx.var
with ib.if_scope(ib.likely(idx < n)):
ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
C_cpu = tvm.extern(A.shape, [A], extern_generator, name='C')
C_gpu = tvm.extern(A.shape, [A], extern_generator_gpu, name='C')
s_cpu = tvm.create_schedule(C_cpu.op)
s_gpu = tvm.create_schedule(C_gpu.op)
print(tvm.lower(s_cpu, [A, C_cpu], simple_mode=True))
print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True))
def check_target(target):
if not tvm.module.enabled(target):
return
s = s_gpu if target in ['opencl', 'cuda'] else s_cpu
C = C_gpu if target in ['opencl', 'cuda'] else C_cpu
# build and invoke the kernel.
f = tvm.build(s, [A, C], target)
ctx = tvm.context(target, 0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
check_target("llvm")
check_target("opencl")
check_target("cuda")
示例10: check_device
def check_device(device, target_device):
if not tvm.module.enabled(target_device):
print("Skip test because {} is not enabled.".format(target_device))
return
device_ctx = tvm.context(device)
graph = get_simplex_graph(host_ctx.device_type, device_ctx.device_type)
shape = (4,)
# Create module for add whose target is the device.
tensor_a = tvm.placeholder(shape, name="A")
tensor_b = tvm.placeholder(shape, name="B")
elemwise_add = tvm.compute(shape, lambda *i: tensor_a(*i)
+ tensor_b(*i), name="elemwise_add")
target = topi.cpp.TEST_create_target(device)
schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add])
lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add],
name="elemwise_add")
# Insert copy. Neither compute nor schedule is required for the copy
# node. The compute will be performed at runtime which is just data
# copy from the input to the output.
tensor_copy = tvm.placeholder(shape, name="__copy")
# Create module for sub whose target is the host.
tensor_c = tvm.placeholder(shape, name="C")
elemwise_sub = tvm.compute(shape, lambda *i: tensor_copy(*i)
- tensor_c(*i), name="elemwise_sub")
schedule_sub = tvm.create_schedule(elemwise_sub.op)
lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c,
elemwise_sub],
name="elemwise_sub")
target_flist = {target_device: [lower_add], target_host: [lower_sub]}
mhost = tvm.build(target_flist, target_host=target_host)
ctx = [host_ctx, device_ctx]
mod = graph_runtime.create(graph, mhost, ctx)
params = {}
params["A"] = tensor_a = np.random.uniform(
size=shape).astype(tensor_a.dtype)
params["B"] = tensor_b = np.random.uniform(
size=shape).astype(tensor_b.dtype)
params["C"] = tensor_c = np.random.uniform(
size=shape).astype(tensor_c.dtype)
mod.set_input(**params)
mod.run()
out = mod.get_output(0, tvm.nd.empty(shape))
np.testing.assert_equal(
out.asnumpy(), (tensor_a + tensor_b) - tensor_c)
示例11: lower
def lower(*args, **kwargs):
"""Thin wrapper of tvm.lower
This wrapper automatically applies VTA's build_config
if there is no user specified build_config in context.
See Also
--------
tvm.lower : The original TVM's lower function
"""
cfg = tvm.build_module.current_build_config()
if not cfg.add_lower_pass:
with build_config():
return tvm.lower(*args, **kwargs)
return tvm.lower(*args, **kwargs)
示例12: _lower
def _lower(sch, inputs, func_name, graph):
import traceback
# pylint: disable=broad-except
try:
f = tvm.lower(sch, inputs, name=func_name)
logging.debug("lower function %s", func_name)
logging.debug("%s", tvm.lower(sch, inputs, simple_mode=True))
except Exception:
msg = traceback.format_exc()
msg += "Error during compile graph\n"
msg += "--------------------------\n"
msg += graph.ir(join_entry_attrs=["shape"])
raise RuntimeError(msg)
return f if isinstance(
f, (tvm.container.Array, tuple, list)) else [f]
示例13: test_upstream
def test_upstream():
@tvm.hybrid.script
def upstream(a):
b = output_tensor((20, ), 'float32')
for i in range(20):
b[i] = a[i] * i
return b
a = tvm.placeholder((20, ), 'float32')
b = tvm.placeholder((20, ), 'float32')
c = tvm.compute((20, ), lambda x: a[x] + b[x])
d = upstream(c)
sch = tvm.create_schedule([c.op, d.op])
ir = tvm.lower(sch, [a, b, d], simple_mode=True)
func = tvm.build(sch, [a, b, d])
assert(func)
a = numpy.random.randn(20).astype('float32')
b = numpy.random.randn(20).astype('float32')
ref = numpy.zeros((20, ), 'float32')
for i in range(20):
ref[i] = (a[i] + b[i]) * i
tvm_a = tvm.nd.array(a)
tvm_b = tvm.nd.array(b)
tvm_d = tvm.nd.array(numpy.zeros((20, )).astype('float32'))
func(tvm_a, tvm_b, tvm_d)
tvm.testing.assert_allclose(tvm_d.asnumpy(), ref, 1e-5, 1e-5)
示例14: test_add_pipeline
def test_add_pipeline():
nn = 1024
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
with ib.for_range(0, n/2) as i:
ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
C = tvm.extern(A.shape, [A], extern_generator, name='C')
s = tvm.create_schedule(C.op)
print(tvm.lower(s, [A, C], simple_mode=True))
def check_llvm():
if not tvm.module.enabled("llvm"):
return
# build and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, c)
np.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + 1)
check_llvm()
示例15: test_in_bounds_vectorize_llvm
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
B = tvm.compute((n,), lambda i: A[i], name='B')
C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
s = tvm.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
s[C].vectorize(xi)
s[B].compute_at(s[C], xo)
xo, xi = s[B].split(B.op.axis[0], factor=2)
s[B].vectorize(xi)
# build and invoke the kernel.
lowered_func = tvm.lower (s, [A, C], "llvm", simple_mode=False)
print (lowered_func.body)
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
a = tvm.nd.empty((n,), A.dtype).copyfrom(
np.random.uniform(size=(n, lanes)))
c = tvm.nd.empty((n,), C.dtype, ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)