當前位置: 首頁>>代碼示例>>Python>>正文


Python tvm.gpu方法代碼示例

本文整理匯總了Python中tvm.gpu方法的典型用法代碼示例。如果您正苦於以下問題:Python tvm.gpu方法的具體用法?Python tvm.gpu怎麽用?Python tvm.gpu使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tvm的用法示例。


在下文中一共展示了tvm.gpu方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_broadcast_to

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_broadcast_to(in_shape, out_shape):
    global TASK
    TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
           + "o" + "_".join([str(ele) for ele in out_shape])
    # Build the logic and compile the function
    A = tvm.te.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)
    s = topi.cuda.schedule_broadcast(B)
    fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")

    data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
    out_npy = np.broadcast_to(data_npy, out_shape)

    data_nd = tvm.nd.array(data_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(data_nd, out_nd)
    tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) 
開發者ID:KnowingNothing,項目名稱:FlexTensor,代碼行數:20,代碼來源:test_broadcast_map.py

示例2: test_matmul_add

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_matmul_add():
    n = 1024
    l = 128
    m = 235
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C = cublas.matmul(A, B)
    s = tvm.create_schedule(C.op)

    def verify(target="cuda"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.gpu(0)
        f = tvm.build(s, [A, B, C], target)
        a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        np.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5)
    verify() 
開發者ID:mlperf,項目名稱:training_results_v0.6,代碼行數:27,代碼來源:test_cublas.py

示例3: test_broadcast_to

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_broadcast_to(in_shape, out_shape):
    global TASK
    TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
           + "o" + "_".join([str(ele) for ele in out_shape])
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)
    s = topi.cuda.schedule_broadcast(B)
    fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")

    data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
    out_npy = np.broadcast_to(data_npy, out_shape)

    data_nd = tvm.nd.array(data_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(data_nd, out_nd)
    np.testing.assert_allclose(out_nd.asnumpy(), out_npy) 
開發者ID:mlperf,項目名稱:training_results_v0.6,代碼行數:20,代碼來源:test_broadcast_map.py

示例4: build_tvm

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def build_tvm(self, net, torch_inputs):
        self.graph_pth = torch2trt.GraphModule(net, torch_inputs)
        with torch2trt.core.tvm_network():
            trace, graph_pth = torch2trt.core.torch2tvm(
                net,
                torch_inputs,
                input_names=self.input_names,
                verbose=self.verbose)
        self.refit_weight_dict = graph_pth.refit_weight_dict
        input_names = get_torch_forward_name(net.forward)
        self.graph_pth = graph_pth
        outputs = graph_pth.get_resolved_outputs()
        tvm_weight_dict = graph_pth.context.tvm_weight_dict
        self.params = {k.name_hint: v for k, v in tvm_weight_dict.items()}
        print(len(self.params))
        self.graph = expr.Function(analysis.free_vars(outputs), outputs)
        if self.graph_post_fn is not None:
            self.graph = self.graph_post_fn(self.graph)
        self.ctx = TVMInference(self.graph, self.params, input_names=input_names, ctx=tvm.gpu(0), cudnn=True) 
開發者ID:traveller59,項目名稱:torch2trt,代碼行數:21,代碼來源:module.py

示例5: test_cuda_lib

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_cuda_lib():
    ctx = tvm.gpu(0)
    for device in ["llvm", "cuda"]:
        if not tvm.runtime.enabled(device):
            print("skip because %s is not enabled..." % device)
            return
    nn = 12
    n = tvm.runtime.convert(nn)
    A = te.placeholder((n,), name='A')
    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = te.create_schedule(B.op)
    bx, tx = s[B].split(B.op.axis[0], factor=4)
    s[B].bind(bx, te.thread_axis("blockIdx.x"))
    s[B].bind(tx, te.thread_axis("threadIdx.x"))

    from tvm.contrib import util
    temp = util.tempdir()
    fn_add = tvm.build(s, [A, B], target="cuda", target_host="llvm", name="add")
    path_lib = temp.relpath("deploy_lib.so")
    fn_add.export_library(path_lib)
    m = tvm.runtime.load_module(path_lib)
    a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), ctx)
    m['add'](a, b)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:27,代碼來源:test_target_codegen_blob.py

示例6: test_cuda_reduction_binding

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_cuda_reduction_binding():
    if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
        print("skip because cuda is not enabled..")
        return

    k = te.reduce_axis((0, 32), 'k')
    A = te.placeholder((96, 32), name='A')
    B = te.compute( (96,), lambda m:
                     te.sum(A[m, k], axis=k),
                     name='B')
    s = te.create_schedule(B.op)

    s[B].reorder(B.op.reduce_axis[0], B.op.axis[0])

    mo, _ = s[B].split(B.op.axis[0], 32)
    s[B].bind(mo, te.thread_axis("blockIdx.x"))

    fcuda = tvm.build(s, [A, B], "cuda") 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:20,代碼來源:test_target_codegen_cuda.py

示例7: test_cuda_const_float_to_half

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_cuda_const_float_to_half():
    # This import is required to use nvcc to perform code gen;
    # otherwise it is found that the code gen is done by nvrtc.
    from tvm import autotvm
    shape = (2, 3, 4)
    a = te.placeholder(shape, dtype='float16', name='a')
    b = tvm.tir.const(0.5, dtype='float16')
    c = te.compute(shape, lambda i, j, k: a[i, j, k] > b, name='c')
    s = te.create_schedule(c.op)
    axes = [axis for axis in c.op.axis]
    fused = s[c].fuse(*axes)
    bx, tx = s[c].split(fused, factor=64)
    s[c].bind(bx, te.thread_axis('blockIdx.x'))
    s[c].bind(tx, te.thread_axis('threadIdx.x'))

    func = tvm.build(s, [a, c], 'cuda')
    ctx = tvm.gpu(0)
    a_np = np.random.uniform(size=shape).astype(a.dtype)
    c_np = np.zeros(shape=shape, dtype=c.dtype)
    a = tvm.nd.array(a_np, ctx)
    c = tvm.nd.array(c_np, ctx)
    func(a, c)
    np.testing.assert_equal(c.asnumpy(), a_np > b.value) 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:25,代碼來源:test_target_codegen_cuda.py

示例8: test_expand_dims

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_expand_dims():
    # based on topi test
    def verify_expand_dims(dshape, dtype, oshape, axis, num_newaxis):
        x = relay.Var("x", relay.TensorType(dshape, dtype))
        func = relay.Function([x], relay.expand_dims(x, axis, num_newaxis))
        for target, ctx in ctx_list():
            if dtype ==  'float16' and target == 'cuda' and not have_fp16(tvm.gpu(0).compute_version):
                continue
            data = np.random.uniform(size=dshape).astype(dtype)
            ref_res = data.reshape(oshape)
            intrp = relay.create_executor("graph", ctx=ctx, target=target)
            op_res = intrp.evaluate(func)(data)
            np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
    for dtype in ['float16', 'float32']:
        verify_expand_dims((3, 10), dtype, (3, 10, 1, 1), 2, 2)
        verify_expand_dims((3, 10), dtype, (1, 3, 10), -3, 1) 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:18,代碼來源:test_op_level1.py

示例9: verify_matmul_add

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def verify_matmul_add(in_dtype, out_dtype, rtol=1e-5):
    n = 1024
    l = 128
    m = 236
    A = te.placeholder((n, l), name='A', dtype=in_dtype)
    B = te.placeholder((l, m), name='B', dtype=in_dtype)
    C = cublas.matmul(A, B, dtype=out_dtype)
    s = te.create_schedule(C.op)

    def verify(target="cuda"):
        if not tvm.runtime.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cublas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.gpu(0)
        f = tvm.build(s, [A, B, C], target)
        a = tvm.nd.array(np.random.uniform(0, 128, size=(n, l)).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(0, 128, size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy().astype(C.dtype), b.asnumpy().astype(C.dtype)), rtol=rtol)
    verify() 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:27,代碼來源:test_cublas.py

示例10: test_broadcast_to

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_broadcast_to(in_shape, out_shape):
    global TASK
    TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
           + "o" + "_".join([str(ele) for ele in out_shape])
    # Build the logic and compile the function
    A = te.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)
    s = topi.cuda.schedule_broadcast(B)
    fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")

    data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
    out_npy = np.broadcast_to(data_npy, out_shape)

    data_nd = tvm.nd.array(data_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(data_nd, out_nd)
    tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:20,代碼來源:test_broadcast_map.py

示例11: verify_vectorization

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def verify_vectorization(n, m, dtype):
    def check_device(device):
        if not tvm.runtime.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        if dtype == "float16" and device == "cuda" and not have_fp16(tvm.gpu(0).compute_version):
            print("Skip because gpu does not have fp16 support")
            return
        with tvm.target.create(device):
            ctx = tvm.context(device, 0)
            A = te.placeholder((n, m), name='A', dtype=dtype)
            B = te.compute((n, m), lambda i, j:
                             A[i, j] + tvm.tir.const(1, A.dtype), name='B')
            S = topi.testing.get_elemwise_schedule(device)(B)

            fun = tvm.build(S, [A, B], device)
            np_A = tvm.nd.empty((n, m), A.dtype, ctx).copyfrom(
                                np.random.uniform(size=(n, m)))
            np_B = tvm.nd.empty((n, m), B.dtype, ctx)
            fun(np_A, np_B)
            tvm.testing.assert_allclose(np_B.asnumpy(), np_A.asnumpy() + 1, rtol=1e-5)

    for device in ["cuda"]:
        check_device(device) 
開發者ID:apache,項目名稱:incubator-tvm,代碼行數:26,代碼來源:test_topi_tensor.py

示例12: run

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def run(args):
    onnx_model = onnx.load_model(run_onnx_util.onnx_model_file(args.test_dir, args.model_file))
    ctx = tvm.gpu()

    input_names, output_names = run_onnx_util.onnx_input_output_names(
        os.path.join(args.test_dir, args.model_file))

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(
        test_data_dir, input_names, output_names)

    inputs = dict(inputs)
    graph_module = None
    if args.frontend == 'nnvm':
        graph_module = build_graph_nnvm(args, ctx, onnx_model, inputs, input_names)
    elif args.frontend == 'relay':
        graph_module = build_graph_relay(args, ctx, onnx_model, inputs, input_names)
    else:
        raise RuntimeError('Invalid frontend: {}'.format(args.frontend))

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        graph_module.run()
        cupy.cuda.device.Device().synchronize()

    return run_onnx_util.run_benchmark(compute, args.iterations) 
開發者ID:pfnet-research,項目名稱:chainer-compiler,代碼行數:37,代碼來源:run_onnx_tvm.py

示例13: test_reduce_map

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):
    global TASK
    # Build the logic and compile the function
    A = tvm.te.placeholder(shape=in_shape, name="A")
    if type == "sum":
        TASK = "sum_map_id%d" %test_id
        B = topi.sum(A, axis=axis, keepdims=keepdims)
    elif type == "max":
        TASK = "max_map_id%d" %test_id
        B = topi.max(A, axis=axis, keepdims=keepdims)
    elif type == "min":
        TASK = "min_map_id%d" %test_id
        B = topi.min(A, axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_reduce(B)
    with tvm.build_config(auto_unroll_max_step=16,
                          auto_unroll_min_depth=0):
        fcuda = tvm.build(s, [A, B], "cuda", name="sum")

    # Test
    in_npy = np.random.normal(size=in_shape).astype(np.float32)
    if type == "sum":
        out_npy = in_npy.sum(axis=axis, keepdims=keepdims)
    elif type == "max":
        out_npy = in_npy.max(axis=axis, keepdims=keepdims)
    elif type == "min":
        out_npy = in_npy.min(axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError

    data_tvm = tvm.nd.array(in_npy, ctx=tvm.gpu())
    out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=tvm.gpu())

    for _ in range(2):
        fcuda(data_tvm, out_tvm)
    tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, rtol=4e-4, atol=4e-4) 
開發者ID:KnowingNothing,項目名稱:FlexTensor,代碼行數:39,代碼來源:test_reduce_map.py

示例14: save_param_dict

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def save_param_dict(params):
    """Save parameter dictionary to binary bytes.

    The result binary bytes can be loaded by the
    GraphModule with API "load_params".

    Parameters
    ----------
    params : dict of str to NDArray
        The parameter dictionary.

    Returns
    -------
    param_bytes: bytearray
        Serialized parameters.

    Examples
    --------
    .. code-block:: python

       # compile and save the modules to file.
       graph, lib, params = nnvm.compiler.build(
          graph, target, shape={"data", data_shape}, params=params)
       module = graph_runtime.create(graph, lib, tvm.gpu(0))
       # save the parameters as byte array
       param_bytes = nnvm.compiler.save_param_dict(params)
       # We can serialize the param_bytes and load it back later.
       # Pass in byte array to module to directly set parameters
       module["load_params"](param_bytes)
    """
    args = []
    for k, v in params.items():
        args.append(k)
        args.append(tvm.nd.array(v))
    return _save_param_dict(*args) 
開發者ID:mlperf,項目名稱:training_results_v0.6,代碼行數:37,代碼來源:param_dict.py

示例15: test_bind

# 需要導入模塊: import tvm [as 別名]
# 或者: from tvm import gpu [as 別名]
def test_bind():
    if not tvm.gpu(0).exist:
        print('[Warning] No GPU found! Skip bind test!')
        return
    @script
    def vec_add(a, b, c):
        for tx in bind('threadIdx.x', 1000):
            c[tx] = b[tx] + c[tx]

    a = tvm.placeholder((1000, ), dtype='float32', name='a')
    b = tvm.placeholder((1000, ), dtype='float32', name='b')
    c = tvm.placeholder((1000, ), dtype='float32', name='c')

    run_and_check(vec_add, [a, b, c], [c], target='cuda') 
開發者ID:mlperf,項目名稱:training_results_v0.6,代碼行數:16,代碼來源:test_hybrid_script.py


注:本文中的tvm.gpu方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。