本文整理汇总了Python中tvm.cpu函数的典型用法代码示例。如果您正苦于以下问题:Python cpu函数的具体用法?Python cpu怎么用?Python cpu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cpu函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_min_repeat_ms
def test_min_repeat_ms():
tmp = tempdir()
filename = tmp.relpath("log")
@tvm.register_func
def my_debug(filename):
"""one call lasts for 100 ms and writes one character to a file"""
time.sleep(0.1)
with open(filename, "a") as fout:
fout.write("c")
X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename))
s = tvm.create_schedule(X.op)
func = tvm.build(s, [X])
x = tvm.nd.empty((), dtype="int32")
ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
number=1, repeat=1)
ftimer(x)
with open(filename, "r") as fin:
ct = len(fin.readline())
assert ct == 2
ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
number=1, repeat=1, min_repeat_ms=1000)
ftimer(x)
# make sure we get more than 10 calls
with open(filename, "r") as fin:
ct = len(fin.readline())
assert ct > 10 + 2
示例2: test_nms
def test_nms():
dshape = (1, 5, 6)
data = sym.Variable("data")
valid_count = sym.Variable("valid_count", dtype="int32")
nms_threshold = 0.7
force_suppress = True
nms_topk = 2
out = sym.nms(data=data, valid_count=valid_count, nms_threshold=nms_threshold,
force_suppress=force_suppress, nms_topk=nms_topk)
np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
[0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
[1, 0.5, 100, 60, 70, 110]]]).astype("float32")
np_valid_count = np.array([4]).astype("int32")
np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45],
[0, 0.4, 4, 21, 19, 40], [-1, 0.9, 35, 61, 52, 79],
[-1, -1, -1, -1, -1, -1]]])
target = "llvm"
ctx = tvm.cpu()
graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)},
dtype={"data": "float32", "valid_count": "int32"})
m = graph_runtime.create(graph, lib, ctx)
m.set_input(**{"data": np_data, "valid_count": np_valid_count})
m.run()
out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32"))
tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
示例3: test_multibox_transform_loc
def test_multibox_transform_loc():
batch_size = 1
num_anchors = 3
num_classes = 3
cls_prob = sym.Variable("cls_prob")
loc_preds = sym.Variable("loc_preds")
anchors = sym.Variable("anchors")
transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds,
anchor=anchors)
out = sym.nms(data=transform_loc_data, valid_count=valid_count)
# Manually create test case
np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]])
np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]])
expected_np_out = np.array([[[1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
[0, 0.44999999, 1, 1, 1, 1],
[0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]])
target = "llvm"
dtype = "float32"
ctx = tvm.cpu()
graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes),
"loc_preds": (batch_size, num_anchors * 4),
"anchors": (1, num_anchors, 4)})
m = graph_runtime.create(graph, lib, ctx)
m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)})
m.run()
out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype))
tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)
示例4: test_in_bounds_conv_llvm
def test_in_bounds_conv_llvm(loop_tiling=False):
HSTR = WSTR = 1
in_channel = 128
kernel_height = kernel_width = 3
out_channel = 64
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
ic = tvm.reduce_axis((0, in_channel), name='ic')
kh = tvm.reduce_axis((0, kernel_height), name='kh')
kw = tvm.reduce_axis((0, kernel_width), name='kw')
conv = tvm.compute((batch_size, out_channel, out_height, out_width),
lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
s = tvm.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
print (lowered_func.body)
ctx = tvm.cpu (0)
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
f(data_input, kernel_input, conv_out)
示例5: test_sort_np
def test_sort_np():
dshape = (1, 2, 3, 4, 5, 6)
axis = 4
reduced_shape = (1, 2, 3, 4, 6)
is_descend = False
data = tvm.placeholder(dshape, name='data')
sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32")
out = tvm.extern(data.shape, [data, sort_num],
lambda ins, outs: tvm.call_packed(
"tvm.contrib.sort.argsort", ins[0],
ins[1], outs[0], axis, is_descend),
dtype='int32', name="sort_tensor")
ctx = tvm.cpu(0)
target = "llvm"
s = tvm.create_schedule(out.op)
f = tvm.build(s, [data, sort_num, out], target)
np_data = np.random.uniform(size=dshape)
np_out = np.argsort(np_data, axis=axis)
sort_num_input = np.full(reduced_shape, dshape[axis])
a = tvm.nd.array(np.array(np_data).astype(data.dtype), ctx)
b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)
示例6: verify
def verify(target="llvm",
algorithm=nnpack.ConvolutionAlgorithm.AUTO,
with_bias=True):
if not tvm.module.enabled(target):
print("skip because %s is not enabled..." % target)
return
if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
print("skip because extern function is not available")
return
if not nnpack.is_available():
return
ctx = tvm.cpu(0)
transformed_kernel = nnpack.convolution_inference_weight_transform(
kernel, algorithm=algorithm)
output = nnpack.convolution_inference_without_weight_transform(
data, transformed_kernel, bias if with_bias else None,
[PAD, PAD, PAD, PAD], [STRIDE, STRIDE],
algorithm=algorithm)
s = tvm.create_schedule(output.op)
f = tvm.build(s, [data, kernel, bias, output], target)
na = np.random.uniform(size=dshape).astype(data.dtype)
nb = np.random.uniform(size=kshape).astype(kernel.dtype)
nc = np.random.uniform(size=bshape).astype(bias.dtype) if with_bias else np.zeros(bshape, dtype=bias.dtype)
ta = tvm.nd.array(na, ctx)
tb = tvm.nd.array(nb, ctx)
tc = tvm.nd.array(nc, ctx)
td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx)
f(ta, tb, tc, td)
nd = np_conv(np.reshape(na, (BATCH, IC, IH, IW)), nb, PAD, STRIDE) + nc.reshape(1, bshape[0], 1, 1)
tvm.testing.assert_allclose(
td.asnumpy(), nd.reshape(BATCH, IC, IH, IW), rtol=1e-5)
示例7: test_dilate
def test_dilate():
target = 'llvm'
ctx = tvm.cpu(0)
def _test_dilate(input_size, strides):
Input = tvm.placeholder((input_size))
Output = topi.nn.dilate(Input, strides)
schedule = tvm.create_schedule(Output.op)
input_np = np.random.uniform(size=input_size).astype(Input.dtype)
output_np = topi.testing.dilate_python(input_np, strides)
input_tvm = tvm.nd.array(input_np, ctx=ctx)
output_size = topi.util.get_const_tuple(Output.shape)
output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
f = tvm.build(schedule, [Input, Output], target)
f(input_tvm, output_tvm)
tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
_test_dilate((32,), (2,))
_test_dilate((32,32), (2,2))
_test_dilate((1,3,32,32), (1,1,1,1))
_test_dilate((1,3,32,32), (2,2,2,2))
_test_dilate((1,32,32,3,3), (1,1,1,1,1))
_test_dilate((1,32,32,3,3), (2,2,2,2,2))
_test_dilate((1,32,32,32,3,3), (1,1,1,2,2,2))
_test_dilate((1,32,32,32,3,3), (2,2,2,1,1,1))
示例8: tune_and_evaluate
def tune_and_evaluate(tuning_opt):
# extract workloads from nnvm graph
print("Extract tasks...")
net, params, data_shape, out_shape = get_network(model_name, batch_size)
tasks = autotvm.task.extract_from_graph(net, target=target,
shape={'data': data_shape}, dtype=dtype,
symbols=(nnvm.sym.conv2d,))
# run tuning tasks
print("Tuning...")
tune_kernels(tasks, **tuning_opt)
# compile kernels with history best records
with autotvm.apply_history_best(log_file):
print("Compile...")
with nnvm.compiler.build_config(opt_level=3):
graph, lib, params = nnvm.compiler.build(
net, target=target, shape={'data': data_shape}, params=params, dtype=dtype)
# upload parameters to device
ctx = tvm.cpu()
data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
module = runtime.create(graph, lib, ctx)
module.set_input('data', data_tvm)
module.set_input(**params)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
示例9: test_in_bounds_vectorize_llvm
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
B = tvm.compute((n,), lambda i: A[i], name='B')
C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
s = tvm.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
s[C].vectorize(xi)
s[B].compute_at(s[C], xo)
xo, xi = s[B].split(B.op.axis[0], factor=2)
s[B].vectorize(xi)
# build and invoke the kernel.
lowered_func = tvm.lower (s, [A, C], "llvm", simple_mode=False)
print (lowered_func.body)
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
a = tvm.nd.empty((n,), A.dtype).copyfrom(
np.random.uniform(size=(n, lanes)))
c = tvm.nd.empty((n,), C.dtype, ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
示例10: verify_bitserial_dense
def verify_bitserial_dense(batch, in_dim, out_dim, activation_bits, weight_bits, unipolar):
input_dtype = 'uint32'
out_dtype = 'int16'
with tvm.target.create('llvm'):
A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A')
B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
C = topi.nn.bitserial_dense(A, B, activation_bits, weight_bits, out_dtype=out_dtype,
unipolar=unipolar)
s = topi.generic.schedule_bitserial_dense([C])
a_shape = get_const_tuple(A.shape)
b_shape = get_const_tuple(B.shape)
@memoize("topi.tests.test_topi_bitseral_dense")
def get_ref_data():
a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits, input_dtype)
b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits, input_dtype)
if unipolar:
b_ = np.copy(b_np).astype(out_dtype)
for x in np.nditer(b_, op_flags=['readwrite']):
x[...] = 1 if x == 1 else -1
c_np = np.dot(a_np, b_.T)
else:
c_np = np.dot(a_np, b_np.T)
return a_np, b_np, c_np
a_np, b_np, c_np = get_ref_data()
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
func = tvm.build(s, [A, B, C], "llvm")
func(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
示例11: check_verify
def check_verify():
if not tvm.module.enabled("llvm"):
print("Skip because llvm is not enabled")
return
mlib = tvm.build(s, [A, B], "llvm", name="myadd")
try:
mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
except ValueError:
return
a = np.random.uniform(size=(n,)).astype(A.dtype)
mod.set_input(x=a)
#verify dumproot created
directory = mod._dump_path
assert(os.path.exists(directory))
#verify graph is there
GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
assert(len(os.listdir(directory)) == 1)
#verify the file name is proper
assert(os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))
mod.run()
#Verify the tensors are dumped
assert(len(os.listdir(directory)) > 1)
#verify the output is correct
out = mod.get_output(0, tvm.nd.empty((n,)))
np.testing.assert_equal(out.asnumpy(), a + 1)
mod.exit()
#verify dump root delete after cleanup
assert(not os.path.exists(directory))
示例12: check_c
def check_c():
if not tvm.module.enabled("llvm"):
return
# Specifically allow offset to test codepath when offset is available
Ab = tvm.decl_buffer(
A.shape, A.dtype,
elem_offset=tvm.var('Aoffset'),
offset_factor=8,
name='A')
binds = {A : Ab}
# BUILD and invoke the kernel.
f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
mhost = tvm.codegen.build_module(fsplits[0], "c")
temp = util.tempdir()
path_dso = temp.relpath("temp.so")
mhost.export_library(path_dso)
m = tvm.module.load(path_dso)
fadd = m["fadd_pipeline"]
ctx = tvm.cpu(0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
fadd(a, b, c)
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
示例13: test_sort
def test_sort():
n = 2
l = 5
m = 3
data = tvm.placeholder((n, l, m), name='data')
sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32")
axis = 1
is_descend = True
out = tvm.extern(data.shape, [data, sort_num],
lambda ins, outs: tvm.call_packed(
"tvm.contrib.sort.argsort", ins[0],
ins[1], outs[0], axis, is_descend),
dtype='int32', name="sort_tensor")
input = [[[1, 2, 3], [2, 4.5, 3.5], [1.1, 0.5, 1], [3.2, -5, 0.5], [1.5, 0, 0]],
[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]]
sort_num_input = [[1, 2, 3], [4, 5, 5]]
sorted_index = [[[0, 1, 1], [1, 0, 0], [2, 2, 2], [3, 3, 3], [4, 4, 4]],
[[3, 4, 4], [2, 3, 3], [1, 2, 2], [0, 1, 1], [4, 0, 0]]]
ctx = tvm.cpu(0)
target = "llvm"
s = tvm.create_schedule(out.op)
f = tvm.build(s, [data, sort_num, out], target)
a = tvm.nd.array(np.array(input).astype(data.dtype), ctx)
b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), np.array(sorted_index).astype(out.dtype), rtol=1e-5)
示例14: verify_conv2d
def verify_conv2d(batch, in_size, in_channel, num_filter, kernel, stride, padding):
in_height = in_width = in_size
with tvm.target.rasp():
A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
B = topi.nn.conv2d(A, W, stride, padding)
s = topi.generic.schedule_conv2d_nchw([B])
a_shape = get_const_tuple(A.shape)
w_shape = get_const_tuple(W.shape)
dtype = A.dtype
@memoize("topi.tests.test_topi_conv2d.verify_conv2d")
def get_ref_data():
a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype)
b_np = topi.testing.conv2d_nchw_python(a_np, w_np, stride, padding)
return a_np, w_np, b_np
a_np, w_np, b_np = get_ref_data()
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, W, B], "llvm")
func(a, w, b)
np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
示例15: test_log_pow_llvm
def test_log_pow_llvm():
# graph
n = tvm.var('n')
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda *i: tvm.power(tvm.log(A(*i)), 2.0), name='B')
s = tvm.create_schedule(B.op)
# create iter var and assign them tags.
bx, tx = s[B].split(B.op.axis[0], factor=32)
# one line to build the function.
if not tvm.module.enabled("llvm"):
return
flog = tvm.build(s, [A, B],
"llvm", name="mylog")
ctx = tvm.cpu(0)
# launch the kernel.
n = 1028
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
repeat = 10
ftimer = flog.time_evaluator(flog.entry_name, ctx, number=1, repeat=repeat)
res = ftimer(a, b)
assert(len(res.results) == repeat)
np.testing.assert_allclose(
b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)