本文整理汇总了Python中tvm.decl_buffer函数的典型用法代码示例。如果您正苦于以下问题:Python decl_buffer函数的具体用法?Python decl_buffer怎么用?Python decl_buffer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了decl_buffer函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_storage_sync
def test_storage_sync():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], factor=8)
s[A2].bind(xo, tvm.thread_axis("blockIdx.x"))
s[A1].compute_at(s[A2], xo)
s[A1].set_scope("shared")
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
flist = tvm.ir_pass.SplitHostDevice(f)
f = flist[1]
f = tvm.ir_pass.ThreadSync(f, "shared")
body_list = tvm.make.stmt_list(f.body.body.body.body)
assert(body_list[1].value.name == "tvm_storage_sync")
示例2: test_storage_combine
def test_storage_combine():
n = 8
A = tvm.placeholder((4,), name='A')
num_stage = 5
B = A
stages = []
for t in range(num_stage):
B = tvm.compute((n, ), lambda i: B[i] + B[0] + (t+1), name='A%d' % t)
stages.append(B)
s = tvm.create_schedule(B.op)
for S in stages[:-1]:
s[S].set_scope("global:tag")
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
assert (n.extents[0].value == 16)
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 1
示例3: test_buffer_index_merge_mult_mod
def test_buffer_index_merge_mult_mod():
m = tvm.var('m')
n = tvm.var('n')
s = tvm.var('s')
k0 = tvm.var('k0')
k1 = tvm.var('k1')
A = tvm.decl_buffer((m, n), tvm.float32)
A_stride = tvm.decl_buffer((m, n), tvm.float32, strides=(s, 1))
def assert_simplified_equal(index_simplified, index_direct):
assert tvm.ir_pass.Equal(index_simplified, index_direct),\
"index_simplified=%s, index_direct=%s" %(index_simplified, index_direct)
# Test Case1
index_simplified = A_stride.vload(((k0 % k1) / s, (k0 % k1) % s + (k0 / k1) * k1))
index_direct = A_stride.vload((0, k0))
assert_simplified_equal(index_simplified, index_direct)
# Test Case2
index_simplified = A.vload(((k0 % (k1 / s)) / n,
(k0 % (k1 / s)) % n + (k0 % k1)))
index_direct = A.vload((0, k0 % k1 + k0 % (k1 / s)))
assert_simplified_equal(index_simplified, index_direct)
# Test Case3
index_simplified = A.vload((((k0 / (k1 / s)) * (k1 / s)) / n + (k0 % (k1 / s)) / n,
((k0 / (k1 / s)) * (k1 / s)) % n + (k0 % (k1 / s)) % n))
index_direct = A.vload((0, k0))
assert_simplified_equal(index_simplified, index_direct)
# Test Case4 (not able to simplify)
index_simplified = A.vload(((k0 % (k1 / s)) / n,
(k0 % (k1 / n)) % n + (k0 % k1)))
index_direct = A.vload((0, ((k0 % (k1 / s)) / n) * n + ((k0 % (k1 / n)) % n + (k0 % k1))))
assert_simplified_equal(index_simplified, index_direct)
示例4: test_copy_pad_split
def test_copy_pad_split():
m = 4 * 3
A = tvm.placeholder((m, ), name="A")
Apad = tvm.compute((m + 2,), lambda i:
tvm.select(tvm.all(i >= 1, i <= m),
A[i - 1], 0.0), "Apad")
B = tvm.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2])
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=4)
s[Apad].compute_at(s[B], xo)
s[Apad].pragma(s[Apad].op.axis[0], "memcpy")
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
def cb(src, dst, pad_before, pad_after, pad_value):
assert(dst.elem_offset.value == 0)
assert_expr_equal(src.elem_offset, tvm.max(xo * 4, 1) - 1)
rpad_before = tvm.max(1 - xo * 4, 0)
rpad_after = tvm.max(xo * 4 - 7, 0)
assert_expr_equal(pad_before[0], rpad_before)
assert_expr_equal(pad_after[0], rpad_after)
assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after)
return tvm.make.Evaluate(0)
stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
示例5: test_storage_share_gpu
def test_storage_share_gpu():
m = tvm.var('m')
A = [tvm.placeholder((m), name='A')]
num_stage = 5
for t in range(num_stage):
A.append(tvm.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t))
A.append(tvm.compute((m,), lambda i: A[-1][i], name='A%d' % t))
s = tvm.create_schedule(A[-1].op)
for t in range(num_stage):
x = A[2*t+2].op.axis[0]
bx, tx = s[A[2*t+2]].split(x, factor=32)
s[A[2*t+2]].bind(bx, tvm.thread_axis("blockIdx.x"))
s[A[2*t+2]].bind(tx, tvm.thread_axis("threadIdx.x"))
s[A[2*t+1]].compute_at(s[A[2*t+2]], tx)
s[A[2*t+1]].set_scope("shared")
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A[0].shape, A[0].dtype, name='A')
Bb = tvm.decl_buffer(A[0].shape, A[0].dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
alloc_stats = {"global": 0, "shared": 0}
def verify(n):
if isinstance(n, tvm.stmt.AttrStmt):
if n.attr_key == "storage_scope":
alloc_stats[n.value.value] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert alloc_stats["global"] == 2
assert alloc_stats["shared"] == num_stage
示例6: test_inplace_rule2
def test_inplace_rule2(scope_tb = "local_TB2", max_bits = 1024 * 1024 * 1024):
#Test Buffer
register_mem(scope_tb, max_bits)
m = 10
A = tvm.placeholder((m,), name='A')
C = tvm.placeholder((m,), name='C')
D = tvm.placeholder((m,), name='D')
A0 = tvm.compute((m,), lambda i: A[i] + C[i], name='A0')
A1 = tvm.compute((m,), lambda i: D[i] * D[i], name='A1')
A2 = tvm.compute((m,), lambda i: A0[i] + A1[i], name='A2')
B = tvm.compute((m,), lambda i: A2[i], name='B')
s = tvm.create_schedule(B.op)
A0L = s.cache_read(A0, scope_tb, [A2])
A1L = s.cache_read(A1, scope_tb, [A2])
A2L = s.cache_read(A2, scope_tb, [B])
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
Cc = tvm.decl_buffer(C.shape, B.dtype, name='C')
Dd = tvm.decl_buffer(D.shape, B.dtype, name='D')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
示例7: test_dynamic_tensor
def test_dynamic_tensor():
dtype = 'float32'
stype = 'csr'
target = 'llvm'
ctx = tvm.context(target, 0)
nr, nc, n = tvm.var('nr'), tvm.var('nc'), tvm.var('n')
A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
assert(A.stype == 'csr')
C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
s = tvm.create_schedule(C.op)
_nr, _nc = 3, 5
a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
a = tvmsp.array(a, ctx)
assert a.data.dtype == a.dtype
Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
binds = {A.data: Ab.data, A.indices: Ab.indices}
f = tvm.build(s, [nr, A.data, C], target, binds=binds)
c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
c.data = tvm.nd.empty(a.data.shape, dtype)
c.indices = a.indices
c.indptr = a.indptr
f(a.data.shape[0], a.data, c.data)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
示例8: intrin_gemv
def intrin_gemv(m, l):
a = tvm.placeholder((l,), name='a')
b = tvm.placeholder((m, l), name='b')
k = tvm.reduce_axis((0, l), name='k')
c = tvm.compute((m,), lambda i: tvm.sum(a[k] * b[i, k], axis=k), name='c')
Ab = tvm.decl_buffer(a.shape, a.dtype,
name="A",
offset_factor=1,
strides=[1])
Bb = tvm.decl_buffer(b.shape, b.dtype,
name="B",
offset_factor=1,
strides=[tvm.var("s1"), 1])
Cb = tvm.decl_buffer(c.shape, c.dtype,
name="C",
offset_factor=1,
strides=[1])
def intrin_func(ins, outs):
ib = tvm.ir_builder.create()
aa, bb = ins
cc = outs[0]
ib.emit(tvm.call_extern("int32", "gemv_update",
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
m, l, bb.strides[0]))
return ib.get()
with tvm.build_config(offset_factor=1):
return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
示例9: test_inplace_rule
def test_inplace_rule():
m = 10
A = tvm.placeholder((m,), name='A')
A0 = tvm.compute((m,), lambda i: A[i], name='A0')
A1 = tvm.compute((m,), lambda i: A[i] + 1, name='A1')
AA = tvm.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA')
B = tvm.compute((m,), lambda i: AA[i] + 1, name='B')
s = tvm.create_schedule(B.op)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
示例10: test_storage_share
def test_storage_share():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
num_stage = 5
B = A
for t in range(num_stage):
B = tvm.compute((m, l), lambda i, j: B[i, j] + (t+1), name='A%d' % t)
s = tvm.create_schedule(B.op)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 1
示例11: test_buffer
def test_buffer():
m = tvm.var('m')
n = tvm.var('n')
l = tvm.var('l')
Ab = tvm.decl_buffer((m, n), tvm.float32)
Bb = tvm.decl_buffer((n, l), tvm.float32)
assert isinstance(Ab, tvm.schedule.Buffer)
assert Ab.dtype == tvm.float32
assert tuple(Ab.shape) == (m, n)
示例12: test_buffer_access_ptr_extent
def test_buffer_access_ptr_extent():
m = tvm.var('m')
n = tvm.var('n')
Ab = tvm.decl_buffer((m, n), tvm.float32)
aptr = Ab.access_ptr("rw")
assert tvm.ir_pass.Equal(aptr.args[3], m * n)
aptr = Ab.access_ptr("rw", offset=100)
assert tvm.ir_pass.Equal(aptr.args[3], m * n - 100)
Ab = tvm.decl_buffer((m, n), tvm.float32, strides=[n + 1 , 1])
aptr = Ab.access_ptr("rw", offset=100)
assert tvm.ir_pass.Equal(aptr.args[3], Ab.strides[0] * m - 100)
示例13: test_buffer_vload
def test_buffer_vload():
m = tvm.var('m')
n = tvm.var('n')
Ab = tvm.decl_buffer((m, n), tvm.float32, elem_offset=100)
load = Ab.vload([2, 3])
offset = tvm.ir_pass.Simplify(load.index)
assert tvm.ir_pass.Equal(offset, n * 2 + 103)
示例14: test_equal_compute
def test_equal_compute():
x = tvm.var('x')
y = tvm.var('y')
n = 128
A = tvm.placeholder((n, n), name='A')
B = tvm.placeholder((n, n), name='B')
ii = tvm.var('i')
jj = tvm.var('j')
def func1():
k = tvm.reduce_axis((0, n), name='k')
return tvm.sum(A[ii, k] * B[jj, k], axis=k)
Ab = tvm.decl_buffer((n,), name='A')
n = tvm.var("n")
def func2():
ib = tvm.ir_builder.create()
A = ib.buffer_ptr(Ab)
with ib.for_range(0, n, name="i") as i:
A[i] = A[i] + 1
with ib.for_range(0, 10, name="j") as j:
A[j] = A[j] + 2
return ib.get()
assert tvm.ir_pass.Equal(func1(), func1())
assert tvm.ir_pass.Equal(func2(), func2())
示例15: intrin_gemv
def intrin_gemv(m, n):
w = tvm.placeholder((m, n), name='w')
x = tvm.placeholder((n,), name='x')
k = tvm.reduce_axis((0, n), name='k')
z = tvm.compute((m,), lambda i:
tvm.sum(w[i, k] * x[k], axis=k), name='z')
Wb = tvm.decl_buffer(w.shape, w.dtype,
name="W",
offset_factor=16,
strides=[tvm.var('ldw'), 1])
def intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
ww_ptr = ww.access_ptr("r")
xx_ptr = xx.access_ptr("r")
zz_ptr = zz.access_ptr("w")
body = tvm.call_packed(
"gemm", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
reset = tvm.call_packed(
"fill_zero", zz_ptr, n)
update = tvm.call_packed(
"gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
return body, reset, update
with tvm.build_config(data_alignment=16,
offset_factor=16):
return tvm.decl_tensor_intrin(z.op, intrin_func,
binds={w: Wb})