本文整理匯總了Python中tvm.build方法的典型用法代碼示例。如果您正苦於以下問題:Python tvm.build方法的具體用法?Python tvm.build怎麽用?Python tvm.build使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tvm
的用法示例。
在下文中一共展示了tvm.build方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _evaluate
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def _evaluate(s, bufs, target, dev_id, number=1, q=None):
ctx = tvm.context(target, dev_id)
tvm_arys = []
for arg in bufs:
shape = utils.to_tuple(arg.shape)
tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
tmp = tvm.nd.array(tmp, ctx)
tvm_arys.append(tmp)
func, evaluator = None, None
try:
func = tvm.build(s, bufs, target)
evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
time_cost = evaluator(*tvm_arys).mean * 1e3
if q:
q.put(time_cost)
return time_cost
except Exception as e:
for item in tvm_arys:
del item
if func is not None:
del func
if evaluator is not None:
del evaluator
raise e
示例2: build_func
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def build_func(func_name, task_key, configs, op_pos=None, rpc_info=None, rewrite=False):
if rpc_info is not None and rpc_info.target_host is not None:
target_host = rpc_info.target_host
else:
target_host = None
task = TASK_TABLE[task_key]
s, bufs = schedule_with_config(task_key, configs, op_pos=op_pos, rewrite=rewrite)
stmt = tvm.lower(s, bufs, simple_mode=True)
valid = verify_code(stmt, task.target, task.dev_id)
if not valid:
raise RuntimeError("Invalid %s(%d) kernel"%(task.target, task.dev_id))
if target_host is not None:
func = tvm.build(s, bufs, target=task.target, target_host=target_host)
else:
func = tvm.build(s, bufs, target=task.target)
func.export_library(os.path.join(LIB_DIR, func_name))
result = ([to_tuple(x.shape) for x in bufs], [buf.dtype for buf in bufs])
return result
示例3: _evaluate
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def _evaluate(s, bufs, target, dev_id, number=1, q=None):
ctx = tvm.context(target, dev_id)
tvm_arys = []
for arg in bufs:
shape = to_tuple(arg.shape)
tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
tmp = tvm.nd.array(tmp, ctx)
tvm_arys.append(tmp)
func, evaluator = None, None
try:
func = tvm.build(s, bufs, target)
evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
time_cost = evaluator(*tvm_arys).mean * 1e3
if q:
q.put(time_cost)
return time_cost
except Exception as e:
for item in tvm_arys:
del item
if func is not None:
del func
if evaluator is not None:
del evaluator
raise e
示例4: __evaluate
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def __evaluate(s, bufs, target, dev_id, number=1, q=None):
beg = time.time()
for i in range(number):
ctx = tvm.context(target, dev_id)
tvm_arys = []
for arg in bufs:
shape = to_tuple(arg.shape)
tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
tmp = tvm.nd.array(tmp, ctx)
tvm_arys.append(tmp)
try:
func = tvm.build(s, bufs, target)
func(*tvm_arys)
except Exception as e:
print("Oops")
print(e)
end = time.time()
time_cost = (end - beg) * 1e3 / number
if q:
q.put(time_cost)
return time_cost
示例5: test
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def test(task_key, configs, dev_id=None, rpc_info=None):
task = TASK_TABLE[task_key]
s, bufs = schedule_with_config(task_key, configs)
# print(tvm.lower(s, bufs, simple_mode=True))
func = tvm.build(s, bufs, "cuda")
print(func.imported_modules[0].get_source())
dev_id = dev_id if dev_id is not None else task.dev_id
time_cost = evaluate(task_key, s, bufs, task.target, dev_id, 10, rpc_info)
print(task_key, "use", time_cost, "ms")
print()
# @tvm.register_func
# def tvm_callback_cuda_compile(code):
# """use nvcc to generate ptx code for better optimization"""
# ptx = tvm.contrib.nvcc.compile_cuda(code, target="ptx", arch="sm_53")
# return ptx
示例6: test
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def test(task_key, configs, dev_id=None, rpc_info=None):
task = TASK_TABLE[task_key]
s, bufs = schedule_with_config(task_key, configs)
# print(tvm.lower(s, bufs, simple_mode=True))
# func = tvm.build(s, bufs, "cuda")
# print(func.imported_modules[0].get_source())
dev_id = dev_id if dev_id is not None else task.dev_id
time_cost = evaluate(task_key, s, bufs, task.target, dev_id, 10, rpc_info)
print(task_key, "use", time_cost, "ms")
print()
# @tvm.register_func
# def tvm_callback_cuda_compile(code):
# """use nvcc to generate ptx code for better optimization"""
# ptx = tvm.contrib.nvcc.compile_cuda(code, target="ptx", arch="sm_53")
# return ptx
示例7: tvm_unpool1d_cpu
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def tvm_unpool1d_cpu(B, C, L, kernel_size, stride, padding, number=10, dev=0):
Input = torch.rand([B, C, L], dtype=torch.float32).cuda("cuda:" + str(dev))
maxpool = torch.nn.MaxPool1d(kernel_size, stride=stride, padding=padding, return_indices=True).cuda("cuda:" + str(dev))
Input, indices = maxpool(Input)
Input = Input.cpu()
indices = indices.cpu()
s, bufs = maxunpooling1d(B, C, Input.shape[2], kernel_size, stride, padding)
s = tvm.te.create_schedule(s)
ctx = tvm.cpu(dev)
f = tvm.build(s, bufs, 'llvm')
im = tvm.nd.array(Input.numpy().astype(np.float32), ctx)
fi = tvm.nd.array(indices.numpy().astype(np.float32), ctx)
in_length = Input.shape[2]
out_length = (in_length - 1) * stride - 2 * padding + kernel_size
output_shape = (B, C, out_length)
un = tvm.nd.array(np.zeros(output_shape).astype(np.float32), ctx)
start_time = time.time()
for i in range(number):
f(im, fi, un)
end_time = time.time()
return (end_time - start_time) * 1e3 / number
示例8: tvm_unpool1d_cuda
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def tvm_unpool1d_cuda(B, C, L, kernel_size, stride, padding, number=10, dev=0):
Input = torch.rand([B, C, L], dtype=torch.float32).cuda("cuda:" + str(dev))
maxpool = torch.nn.MaxPool1d(kernel_size, stride=stride, padding=padding, return_indices=True).cuda("cuda:" + str(dev))
Input, indices = maxpool(Input)
Input = Input.cpu()
indices = indices.cpu()
s, bufs = maxunpooling1d(B, C, Input.shape[2], kernel_size, stride, padding)
s = tvm.te.create_schedule(s)
f = tvm.build(s, bufs, "cuda")
ctx = tvm.context("cuda", dev_id=dev)
im = tvm.nd.array(Input.numpy().astype(np.float32), ctx)
fi = tvm.nd.array(indices.numpy().astype(np.float32), ctx)
in_length = Input.shape[2]
out_length = (in_length - 1) * stride - 2 * padding + kernel_size
output_shape = (B, C, out_length)
un = tvm.nd.array(np.zeros(output_shape).astype(np.float32), ctx)
start_time = time.time()
for i in range(number):
f(im, fi, un)
end_time = time.time()
return (end_time - start_time) * 1e3 / number
示例9: tvm_PixelCNN_cpu
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def tvm_PixelCNN_cpu(B, H, W, C, out_C, kernel_height, kernel_width, mask_type, bias, dilation, stride, padding, number=10, dev=0):
Input = torch.rand([B, H, W, C], dtype=torch.float32)
Kernel = torch.zeros([out_C, C, kernel_height, kernel_width], dtype=torch.float32)
s, bufs = pixelcnn(B, H, W, C, out_C, kernel_height, kernel_width, mask_type, bias, dilation=dilation, stride=stride, padding=padding)
ctx = tvm.cpu(dev_id=dev)
s = tvm.te.create_schedule(s)
f = tvm.build(s, bufs, "llvm")
im = tvm.nd.array(Input.numpy().astype(np.float32), ctx)
fi = tvm.nd.array(Kernel.numpy().astype(np.float32), ctx)
in_height = H
in_width = W
out_height = (H + 2 * padding - dilation * (kernel_height - 1) - 1) // stride + 1
out_width = (W + 2 * padding - dilation * (kernel_width - 1) - 1) // stride + 1
output_shape = (B, out_height, out_width, out_C)
un = tvm.nd.array(np.zeros(output_shape).astype(np.float32), ctx)
start_time = time.time()
for i in range(number):
f(im, fi, un)
end_time = time.time()
return (end_time - start_time) * 1e3 / number
示例10: check_result
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def check_result(configs, shape, target="cuda", dev_id=0):
ctx = tvm.context(target, dev_id)
name, configs = configs
batch, in_channel, H, W, out_channel, k, _, stride, padding, dilation, groups = shape
A_np = np.random.uniform(-10, 10, size=[batch, in_channel, H, W]).astype("float32")
A_tvm = tvm.nd.array(A_np, ctx)
A_torch = torch.tensor(A_np) # .cuda("cuda:" + str(dev_id))
W_np = np.random.uniform(-10, 10, size=[out_channel, in_channel//groups, k, k]).astype("float32")
W_tvm = tvm.nd.array(W_np, ctx)
W_torch = torch.tensor(W_np) # .cuda("cuda:" + str(dev_id))
Output_torch = torch.nn.functional.conv2d(A_torch, W_torch, stride=stride, padding=padding, dilation=dilation, groups=groups)
Output_np = np.zeros(Output_torch.shape).astype(np.float32)
Output_tvm = tvm.nd.array(Output_np, ctx)
s, bufs = schedule_with_config(name, configs)
func = tvm.build(s, bufs, target)
func(A_tvm, W_tvm, Output_tvm)
passed = test_allclose(Output_tvm.asnumpy(), Output_torch.cpu().numpy(), rtol=1e-5, print_diff=True)
if passed == 1:
print("Passed!")
else:
print("Failed!")
示例11: test_broadcast_to
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def test_broadcast_to(in_shape, out_shape):
global TASK
TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
+ "o" + "_".join([str(ele) for ele in out_shape])
# Build the logic and compile the function
A = tvm.te.placeholder(shape=in_shape, name="A")
B = topi.broadcast_to(A, out_shape)
s = topi.cuda.schedule_broadcast(B)
fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")
data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
out_npy = np.broadcast_to(data_npy, out_shape)
data_nd = tvm.nd.array(data_npy, tvm.gpu())
out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
for _ in range(2):
fcuda(data_nd, out_nd)
tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
示例12: compute_inline_reduce
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def compute_inline_reduce():
A = tvm.te.placeholder((32, 32, 32, 32), dtype="float32", name="A")
B = tvm.te.placeholder((32, 32), dtype="float32", name="B")
k = tvm.te.reduce_axis((0, 32), name="k")
C = tvm.te.compute((30, 30, 32, 32), lambda a, b, c, d: tvm.te.sum(A[a, b, c, k] * B[k, d], axis=k), name="C")
D = tvm.te.compute((30, 30, 32, 32), lambda h, k, l, m: (C[h, k, l, m] * 2), name="D")
E = tvm.te.compute((30, 30, 32, 32), lambda h, k, l, m: (C[h, k, l, m] * 3), name="E")
F = tvm.te.compute((30, 30, 32, 32), lambda h, k, l, m: (D[h, k, l, m] + E[h, k, l, m]), name="F")
s = tvm.te.create_schedule(F.op)
s[C].compute_inline()
try:
tvm.build(s, [A, F], "llvm")
except Exception as e:
return False, str(e)
return True, "pass"
示例13: evaluate
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def evaluate(s, bufs, target, dev_id, number):
A, W, B = bufs
func = tvm.build(s, bufs, target)
# print(func.imported_modules[0].get_source())
if target == "cuda":
ctx = tvm.gpu(dev_id)
elif target == "llvm":
ctx = tvm.cpu(dev_id)
else:
raise ValueError("not support {}".format(target))
a_np = np.random.uniform(size=(in_size, in_size, in_channel, batch)).astype(A.dtype)
w_np = np.random.uniform(size=(kernel, kernel, in_channel, out_channel)).astype(W.dtype)
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros((out_size, out_size, out_channel, batch), dtype=B.dtype), ctx)
# func(a, w, b)
evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
return evaluator(a, w, b).mean * 1e3
示例14: test_mean
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def test_mean():
#################################
# test basic case
inputs_np = np.random.random([2, 3, 27, 3, 17]).astype(np.float32) * 100
inputs_torch = torch.tensor(inputs_np)
output_torch = torch.mean(inputs_torch, dim=2)
tvm_ctx = tvm.context("llvm", 0)
inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
output_tvm = tvm.nd.array(np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
output_t = mean(inputs_t, dim=2)
s = tvm.te.create_schedule(output_t.op)
func = tvm.build(s, [inputs_t, output_t], "llvm")
func(inputs_tvm, output_tvm)
passed = test_allclose(output_tvm.asnumpy(), output_torch.numpy(), rtol=1e-5, print_diff=True)
if passed == 1:
print("Mean basic case passed!")
else:
print("Mean basic case failed!")
示例15: test_variance
# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import build [as 別名]
def test_variance():
#################################
# test basic case
inputs_np = np.random.random([2, 3, 27, 3, 17]).astype(np.float32) * 100
inputs_torch = torch.tensor(inputs_np)
output_torch = inputs_torch.var(dim=2)
tvm_ctx = tvm.context("llvm", 0)
inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
output_tvm = tvm.nd.array(np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
output_t = variance(inputs_t, dim=2)
s = tvm.te.create_schedule(output_t.op)
func = tvm.build(s, [inputs_t, output_t], "llvm")
func(inputs_tvm, output_tvm)
passed = test_allclose(output_tvm.asnumpy(), output_torch.numpy(), rtol=1e-5, print_diff=True)
if passed == 1:
print("Variance basic case passed!")
else:
print("Variance basic case failed!")