本文整理汇总了Python中tvm.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_reduce_combiner_simplify
def test_reduce_combiner_simplify():
ck = CanonicalChecker()
dummy = tvm.var('dummy')
comm_reducer = tvm.comm_reducer
prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.const(1, t0))
sum_or_prod = comm_reducer(
lambda x, y: tvm.expr.Select(dummy < 0,
x + y, x*y),
lambda t0: tvm.expr.Select(dummy < 0,
tvm.const(0, t0), tvm.const(1, t0)))
sum_and_prod = comm_reducer(
lambda x, y: (x[0] + y[0],
x[1]*y[1]),
lambda t0, t1: (tvm.const(0, t0),
tvm.const(5, t0) - tvm.const(4, t0)))
some_reducer1 = comm_reducer(
lambda x, y: (x[0] + y[0],
x[0] + y[0] + x[1] + y[1],
x[0]*y[2] + y[0]*x[2],
x[1] + y[2],
4.0),
lambda t0, t1, t2, t3, t4: (tvm.const(0, t0),
tvm.const(1, t1),
tvm.const(2, t2),
tvm.const(3, t3),
tvm.const(4, t4)))
k = tvm.reduce_axis((0, 10), name="k")
A = tvm.placeholder((10,), name='A')
# Test that SimplifyCombiner makes use of vranges
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, -4))
ck.verify(sum_or_prod(A[k], k), tvm.sum(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(5, 9), True)
ck.verify(sum_or_prod(A[k], k), prod(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, 100), True)
ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], tvm.sum(A[k], k))
ck.verify(sum_and_prod((A[k], A[10-k]), k)[1], prod(A[10-k], k))
reference_simplified_sources = [[A[0]],
[A[0], A[1]],
[A[0], A[2]],
[A[0], A[1], A[2], A[3]],
[A[4]]]
for j in range(5):
# Here we use the j-th component of the result, so only it and the components it
# depends on are left.
simplified = ck.analyzer.canonical_simplify(
some_reducer1((A[0], A[1], A[2], A[3], A[4]), k)[j])
# Check that the remaining components are the expected ones.
for lhs, rhs in zip(simplified.source, reference_simplified_sources[j]):
assert tvm.ir_pass.Equal(lhs, rhs)
# Test that components with side effects are not removed
side_effect = lambda *xs: tvm.make.Call("int32", "dummy", xs, tvm.expr.Call.Intrinsic, None, 0)
ck.verify(sum_and_prod((A[k], side_effect(A[10-k])), k)[0],
sum_and_prod((A[k], side_effect(A[10-k])), k)[0])
ck.verify(sum_and_prod((side_effect(A[k]), A[10-k]), k)[0],
tvm.sum(side_effect(A[k]), k))
示例2: _declaration_dense_nopack
def _declaration_dense_nopack(cfg, data, weight, bias=None, out_dtype=None):
if out_dtype is None:
out_dtype = data.dtype
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
# create tuning space
cfg.define_split("tile_x", out_dim, num_outputs=2)
cfg.define_split("tile_y", batch, num_outputs=2)
cfg.define_split("tile_k", in_dim, num_outputs=2)
if cfg.is_fallback:
_default_dense_nopack_config(cfg, batch, out_dim, in_dim)
vec = cfg["tile_k"].size[-1]
k = tvm.reduce_axis((0, in_dim // vec), "k")
CC = tvm.compute((batch, out_dim, vec),
lambda z, y, x: tvm.sum(
data[z, k * vec + x].astype(out_dtype) *
weight[y, k * vec + x].astype(out_dtype), axis=k))
kk = tvm.reduce_axis((0, vec), "kk")
C = tvm.compute((batch, out_dim),
lambda y, x: tvm.sum(CC[y, x, kk], axis=kk),
tag="dense_nopack")
if bias is not None:
C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
tag=tag.BROADCAST)
return C
示例3: test_lstm_cell_inline
def test_lstm_cell_inline():
num_step = 128
num_input = 256
num_hidden = 1152
batch_size = 4
# Global transition matrix
X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
s_init_c = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
s_init_h = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
k = tvm.reduce_axis((0, num_input), name="ki2h")
s_i2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
name="s_i2h")
k = tvm.reduce_axis((0, num_hidden), name="ki2h")
s_h2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
gates = tvm.compute(s_i2h.shape, lambda *i:
s_i2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
next_c = tvm.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
next_h = tvm.compute(gshape,
lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
scan_h, scan_c = tvm.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[X],
name="lstm_scan")
# schedule
s = tvm.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
s[in_transform].compute_inline()
s[forget_gate].compute_inline()
s[out_gate].compute_inline()
# verify we can lower correctly
tvm.lower(s, [X, Wi2h, Wh2h, scan_h, scan_c])
示例4: test_tensor_reduce_multi_axis
def test_tensor_reduce_multi_axis():
m = tvm.var('m')
n = tvm.var('n')
A = tvm.placeholder((m, n), name='A')
k1 = tvm.reduce_axis((0, n), "k")
k2 = tvm.reduce_axis((0, m), "k")
C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2)))
C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2]))
示例5: test_reduce_simplify
def test_reduce_simplify():
ck = CanonicalChecker()
k = tvm.reduce_axis((0, 10), name="k")
j = tvm.reduce_axis((-5, 3), name="j")
A = tvm.placeholder((10,), name='A')
ck.verify(tvm.sum(tvm.expr.Select(k + j < 12, k + j, 0), [k, j]),
tvm.sum(k + j, [k, j]))
ck.verify(tvm.sum(A[3], []), A[3])
# The rule below is not typical, removed for now
ck.verify(tvm.sum(k / 10, k), tvm.sum(tvm.const(0, "int32"), k))
示例6: _conv
def _conv(n, h, w, co, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if dorefa:
return tvm.sum(
(tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1].astype(out_dtype) &
kernel_vec[co, dh, dw, ci, vc, b2].astype(out_dtype)) -
tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1].astype(out_dtype) &
~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2,
axis=[dh, dw, ci, b1, b2])
return tvm.sum(tvm.popcount(
data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] &
kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) << b1b2,
axis=[dh, dw, ci, b1, b2])
示例7: _conv
def _conv(n, co, h, w, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if unipolar:
return tvm.sum((tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) &
kernel_vec[co, ci, dh, dw, b2, vc].astype(out_dtype)) -
tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype)
& ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2,
axis=[ci, dh, dw, b1, b2])
return tvm.sum((tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1] &
kernel_vec[co, ci, dh, dw, b2, vc])).astype(out_dtype) << b1b2,
axis=[ci, dh, dw, b1, b2])
示例8: matmul
def matmul(N, L, M, dtype):
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
##### define space begin #####
cfg = autotvm.get_config()
cfg.define_split("tile_y", y, num_outputs=2)
cfg.define_split("tile_x", x, num_outputs=2)
##### define space end #####
# schedule according to config
yo, yi = cfg["tile_y"].apply(s, C, y)
xo, xi = cfg["tile_x"].apply(s, C, x)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
示例9: binary_dense
def binary_dense(data, weight):
"""Binary matrix multiplication using xor and bit-count.
Parameters
----------
data : tvm.Tensor
2-D with shape [batch, in_dim], dtype is uint32.
weight : tvm.Tensor
2-D with shape [out_dim, in_dim], dtype is uint32.
Returns
-------
output : tvm.Tensor
2-D with shape [batch, out_dim], dtype is float32.
"""
assert data.dtype == 'uint32' and weight.dtype == 'uint32', \
"dtype of data and weight should be uint32"
assert len(data.shape) == 2 and len(weight.shape) == 2, \
"only support 2-dim binary dense"
batch, in_dim = data.shape
out_dim, _ = weight.shape
k = tvm.reduce_axis((0, in_dim), name='k')
matmul = tvm.compute((batch, out_dim), lambda i, j: \
tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \
tag='binary_dense')
return tvm.compute((batch, out_dim), lambda i, j: \
32 * in_dim - 2. * matmul(i, j), \
tag=tag.ELEMWISE)
示例10: _sample
def _sample(i, c, ph, pw):
roi = rois[i]
batch_index = roi[0].astype('int32')
roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
roi_start_h *= spatial_scale
roi_end_h *= spatial_scale
roi_start_w *= spatial_scale
roi_end_w *= spatial_scale
# force malformed ROIs to be 1x1
roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype))
roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype))
bin_h = roi_h / pooled_size_h
bin_w = roi_w / pooled_size_w
if sample_ratio > 0:
roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32')
else:
roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32')
roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32')
count = roi_bin_grid_h * roi_bin_grid_w
rh = tvm.reduce_axis((0, roi_bin_grid_h))
rw = tvm.reduce_axis((0, roi_bin_grid_w))
roi_start_h += ph * bin_h
roi_start_w += pw * bin_w
return tvm.sum(_bilinear(batch_index, c,
roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count,
axis=[rh, rw])
示例11: test_local_gemm
def test_local_gemm():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
nn = 1024
n = tvm.var('n')
n = tvm.convert(nn)
m = n
l = n
A = tvm.placeholder((n, l), name='A', dtype='int32')
B = tvm.placeholder((m, l), name='B', dtype='int32')
k = tvm.reduce_axis((0, l), name='k')
C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
s = tvm.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opengl", name="gemm")
print("------opengl code------")
print(f.imported_modules[0].get_source(fmt="gl"))
ctx = tvm.opengl()
n, m, l = nn, nn, nn
a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype)
b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
示例12: test_in_bounds_conv_llvm
def test_in_bounds_conv_llvm(loop_tiling=False):
HSTR = WSTR = 1
in_channel = 128
kernel_height = kernel_width = 3
out_channel = 64
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
ic = tvm.reduce_axis((0, in_channel), name='ic')
kh = tvm.reduce_axis((0, kernel_height), name='kh')
kw = tvm.reduce_axis((0, kernel_width), name='kw')
conv = tvm.compute((batch_size, out_channel, out_height, out_width),
lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
s = tvm.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
print (lowered_func.body)
ctx = tvm.cpu (0)
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
f(data_input, kernel_input, conv_out)
示例13: matmul_v1
def matmul_v1(N, L, M, dtype):
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
# 2. get the config object
cfg = autotvm.get_config()
# 3. define search space
cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
cfg.define_knob("tile_x", [1, 2, 4, 8, 16])
# 4. schedule according to config
yo, yi = s[C].split(y, cfg['tile_y'].val)
xo, xi = s[C].split(x, cfg['tile_x'].val)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
示例14: global_pool
def global_pool(data, pool_type):
"""Perform global pooling on the data
Parameters
----------
data : tvm.Tensor
4-D with shape [batch, channel, in_height, in_width]
pool_type : str
Pool type, 'max' or 'avg'
Returns
-------
output : tvm.Tensor
4-D with shape [batch, channel, 1, 1]
"""
assert len(data.shape) == 4, "only support 4-dim pooling"
batch, channel, height, width = data.shape
dheight = tvm.reduce_axis((0, height))
dwidth = tvm.reduce_axis((0, width))
if pool_type == 'max':
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.max(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_max")
elif pool_type == 'avg':
tsum = tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.sum(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_sum")
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tsum[n, c, h, w] / (height*width).astype(tsum.dtype), \
tag=tag.ELEMWISE)
else:
raise ValueError("Pool type should be 'avg' or 'max'.")
示例15: test_dot
def test_dot():
nn = 12
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
k = tvm.reduce_axis((0, n), 'k')
C = tvm.compute((1,), lambda _: tvm.sum(A[k] * B[k], axis=k), name='C')
s = tvm.create_schedule(C.op)
fapi = lower(s, [A, B, C])
def verify(target):
if not tvm.module.enabled(target):
print("Target %s is not enabled" % target)
return
f = tvm.codegen.build_module(fapi, target)
# verify
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(nn,)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(nn,)).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros((1,), dtype=C.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(
c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-4)
verify("llvm")