当前位置: 首页>>代码示例>>Python>>正文


Python tvm.compute方法代码示例

本文整理汇总了Python中tvm.compute方法的典型用法代码示例。如果您正苦于以下问题:Python tvm.compute方法的具体用法?Python tvm.compute怎么用?Python tvm.compute使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tvm的用法示例。


在下文中一共展示了tvm.compute方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_bound_scan

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_bound_scan():
    m = tvm.var("m")
    n = tvm.var("n")
    X = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
    s_state = tvm.placeholder((m, n))
    s_init = tvm.compute((1, n), lambda _, i: X[0, i])
    s_update = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + X[t, i])
    s_scan = tvm.scan(s_init, s_update, s_state)

    assert tuple(s_scan.shape) == (m, n)
    s = tvm.create_schedule(s_scan.op)
    XX = s.cache_read(X, "local", s_update)
    xo, xi = s[s_update].split(s_update.op.axis[1], factor=4)
    s[XX].compute_at(s[s_update], xo)
    s = s.normalize()
    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
    assert bounds[XX.op.axis[1]].extent.value == 4 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:20,代码来源:test_schedule_bound_inference.py

示例2: get_tvm_add

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def get_tvm_add():
    # define compute
    n = tvm.var('n')
    A = tvm.placeholder(n, name='A', dtype='float32')
    B = tvm.placeholder(n, name='B', dtype='float32')
    C = tvm.compute((n,), lambda i: A[i] + B[i], name='C')

    # build function (with parallel support)
    with tvm.target.create('llvm'):
        s = topi.generic.schedule_injective([C])
        func_cpu = tvm.build(s, [A, B, C])

    if mobula.utils.list_gpus():
        with tvm.target.create('cuda'):
            s = topi.generic.schedule_injective([C])
            func_gpu = tvm.build(s, [A, B, C])
    else:
        func_gpu = None

    return func_cpu, func_gpu 
开发者ID:wkcn,项目名称:MobulaOP,代码行数:22,代码来源:TVMOp.py

示例3: test_add

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_add(target_dir):
    if not tvm.module.enabled("cuda"):
        print("skip %s because cuda is not enabled..." % __file__)
        return
    n = tvm.var("n")
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")

    s = tvm.create_schedule(C.op)

    bx, tx = s[C].split(C.op.axis[0], factor=64)
    s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
    s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
    fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd")

    fadd_cuda.save(os.path.join(target_dir, "add_gpu.o"))
    fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx"))
    cc.create_shared(os.path.join(target_dir, "add_gpu.so"),
            [os.path.join(target_dir, "add_gpu.o")]) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:22,代码来源:test_add_gpu.py

示例4: test_verify_memory_not_bind

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_verify_memory_not_bind():
  n = tvm.var("n")
  A = tvm.placeholder((n,), name='A')
  B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")

  # B is not bound to threads.
  s = tvm.create_schedule(B.op)

  func = lower(s, [A, B])  

  for dev_type in gpu_devices:
    assert not tvm.ir_pass.VerifyMemory(func, dev_type)
  for dev_type in other_devices:
    assert tvm.ir_pass.VerifyMemory(func, dev_type)


# Computations are partially bound. 
# So VerifyMemory pass fails when device type is GPU.
# 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:21,代码来源:test_pass_verify_memory.py

示例5: test_verify_memory_partially_bind

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_verify_memory_partially_bind():
  n = tvm.var("n")
  A = tvm.placeholder((n,), name='A')
  B = tvm.compute(A.shape, lambda i: A[i] + 1.0, name="B")
  C = tvm.compute(B.shape, lambda i: B[i] + 2.0, name="C")
  D = tvm.compute(C.shape, lambda i: C[i] + 2.0, name="D")

  # C is bound to threads, but B and D are not.
  s = tvm.create_schedule([B.op, C.op, D.op])
  bx, tx = s[C].split(C.op.axis[0], factor=64)
  s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
  s[C].bind(tx, tvm.thread_axis("threadIdx.x"))

  func = lower(s, [A, B, C, D])  

  for dev_type in gpu_devices:
    assert not tvm.ir_pass.VerifyMemory(func, dev_type)
  for dev_type in other_devices:
    assert tvm.ir_pass.VerifyMemory(func, dev_type) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:21,代码来源:test_pass_verify_memory.py

示例6: test_bound3

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_bound3():
    m = tvm.var('m')
    l = tvm.var('l')
    A = tvm.placeholder((m, l), name='A')
    A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
    A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')

    s = tvm.create_schedule(A2.op)
    s[A1].set_scope("shared")
    xo, xi = s[A2].split(A2.op.axis[0], 32)
    xi0, xi1 = s[A2].split(xi, nparts=16)
    s[A2].bind(xi0, tvm.thread_axis("threadIdx.x"))
    yo, yi = s[A2].split(A2.op.axis[1], 16)
    # test normalize not affecting schedule
    _ = s.normalize()
    s[A2].reorder(xo, xi0, yo, xi1, yi)
    s[A1].compute_at(s[A2], yo)

    bounds = tvm.schedule.InferBound(s)
    assert isinstance(bounds, tvm.container.Map)
    assert(bounds[A1.op.axis[0]].extent.value==32)
    assert(bounds[A1.op.axis[1]].extent.value==16) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:24,代码来源:test_schedule_bound_inference.py

示例7: test_bound_nest_thread

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_bound_nest_thread():
    m = tvm.var('m')
    A = tvm.placeholder((m), name='A')
    A1 = tvm.compute((m,), lambda i: A[i], name='A1')
    A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2')
    A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3')

    s = tvm.create_schedule(A3.op)
    s[A2].set_scope("shared")
    s[A1].set_scope("local")

    block_x = tvm.thread_axis("blockIdx.x")
    thread_x = tvm.thread_axis("threadIdx.x")
    bx, tx = s[A3].split(A3.op.axis[0], factor=32)
    s[A3].bind(bx, block_x)
    s[A3].bind(tx, thread_x)
    s[A2].compute_at(s[A3], tx)
    _, xi = s[A2].split(A2.op.axis[0], nparts=1)
    s[A2].bind(xi, thread_x)
    s[A1].compute_at(s[A3], tx)
    s = s.normalize()
    bounds = tvm.schedule.InferBound(s)
    assert(bounds[A1.op.axis[0]].extent.value==1)
    assert(bounds[A2.op.axis[0]].extent.value==32)
    assert(bounds[A3.op.axis[0]].extent == m) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:27,代码来源:test_schedule_bound_inference.py

示例8: test_makeapi

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_makeapi():
    """Not yet working, mock design"""
    n = tvm.var('n')
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
    s = tvm.create_schedule(C.op)

    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)

    Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
    Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
    Cb = tvm.decl_buffer(C.shape, C.dtype, name='C')
    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B:Bb, C:Cb}, 64)

    num_unpacked_args = 2
    f = tvm.ir_pass.MakeAPI(
        stmt, "myadd", [n, Ab, Bb, Cb], num_unpacked_args, True)
    assert(f.handle_data_type[Ab.data].dtype == Ab.dtype)
    assert(len(f.args) == 5)
    output_ssa = False 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:24,代码来源:test_pass_makeapi.py

示例9: test_flatten_storage_align

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_flatten_storage_align():
    m = 8
    l = 16
    A = tvm.placeholder((m, l), name='A')
    A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
    A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')

    s = tvm.create_schedule(A2.op)
    s[A1].storage_align(A1.op.axis[0], 2, 1)
    bounds = tvm.schedule.InferBound(s)
    assert isinstance(bounds, tvm.container.Map)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
    Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
    A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
    stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
    stmt = tvm.ir_pass.Simplify(stmt)
    assert(stmt.body.extents[0].value == 17 * 8) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_pass_storage_flatten.py

示例10: test_reorder

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_reorder():
    m = tvm.var('m')
    A = tvm.placeholder((m,), name='A')
    T = tvm.compute(m, lambda i: A[i+1])

    s = tvm.create_schedule(T.op)
    xo, xi = s[T].split(T.op.axis[0], factor=10)
    xi1, xi2 = s[T].split(xi, factor=2)
    order = (xi2, xi1, xo)
    assert tuple(s[T].leaf_iter_vars) != order
    s[T].reorder(*order)
    assert tuple(s[T].leaf_iter_vars) == order
    try:
        # pass duplicate IterVar
        # must raise an error
        s[T].reorder(xi2, xi1, xi2)
        assert False
    except tvm.TVMError:
        pass 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:21,代码来源:test_lang_schedule.py

示例11: test_tensor_intrin

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_tensor_intrin():
    n = 16
    x = tvm.placeholder((n,), name='x')
    y = tvm.placeholder((n,), name='y')
    z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
    def intrin_func(ins, outs):
        assert(isinstance(ins[0], tvm.schedule.Buffer))
        assert(ins[0].shape[0].value == n)
        return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
    intrin = tvm.decl_tensor_intrin(z.op, intrin_func)
    assert intrin.op == z.op
    assert intrin.reduce_init is None
    assert tuple(intrin.inputs) == tuple(z.op.input_tensors)
    assert(intrin.buffers[0].shape[0].value == n)
    m = 32
    x = tvm.placeholder((m,), name='x')
    y = tvm.placeholder((m,), name='y')
    z = tvm.compute(x.shape, lambda i: x[i] + y[i], name='z')
    s = tvm.create_schedule(z.op)
    xo, xi = s[z].split(z.op.axis[0], factor=n)
    s[z].tensorize(xi, intrin)
    assert(s[z].iter_var_attrs[xi].tensor_intrin == intrin)
    assert(s[z].iter_var_attrs[xi].iter_type == tvm.schedule.IterVar.Tensorized) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:25,代码来源:test_lang_schedule.py

示例12: test_thread_axis

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_thread_axis():
    m = tvm.var('m')
    l = tvm.var('l')
    A = tvm.placeholder((m, l), name='A')
    B = tvm.compute((m, l), lambda i, j: A[i, j] + 3, name='B')
    s = tvm.create_schedule(B.op)

    s[B].set_scope("shared")
    num_thread = 16
    xo, xi = s[B].split(B.op.axis[0], 32)
    xi0, xi1 = s[B].split(xi, nparts=num_thread)
    s[B].bind(xi0, tvm.thread_axis("threadIdx.x"))

    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
    stmt = tvm.ir_pass.LoopPartition(stmt, False)
    stmt = tvm.ir_pass.Simplify(stmt)
    assert('if' not in str(stmt.body.body.body.first)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:20,代码来源:test_pass_loop_partition.py

示例13: test_vectorize

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_vectorize():
    n = tvm.var('n')
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    bias = tvm.var("bias", dtype="float32")
    scale = tvm.var("scale", dtype="float32")
    C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i) * scale + bias, name='C')
    # schedule
    s = tvm.create_schedule(C.op)
    # create iter var and assign them tags.
    num_thread = 32
    bx, x = s[C].split(C.op.axis[0], factor=num_thread*4)
    tx, x = s[C].split(x, nparts=num_thread)
    _, x = s[C].split(x, factor=4)
    s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
    s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
    s[C].vectorize(x)
    stmt = lower(s, [A, B])
    body = stmt.body.body.body.body.body
    assert(x.var.name not in str(body.condition))
    assert(any(collect_visit(body.then_case, lambda x: isinstance(x, tvm.expr.Ramp)))) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:23,代码来源:test_pass_loop_partition.py

示例14: test_thread_axis2

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_thread_axis2():
    n = tvm.convert(4096)
    m = tvm.var('m')
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='C')
    s = tvm.create_schedule(C.op)
    num_thread = 32
    bx, x = s[C].split(C.op.axis[0], factor=32)
    tx, x = s[C].split(x, nparts=num_thread)
    _,  x = s[C].split(x, factor=m)
    s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
    s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
    stmt = lower(s, [A, B])
    for_body = stmt.body.body.body.body.body.first
    assert('threadIdx' not in str(for_body.extent)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:18,代码来源:test_pass_loop_partition.py

示例15: test_cuda_vectorize_load

# 需要导入模块: import tvm [as 别名]
# 或者: from tvm import compute [as 别名]
def test_cuda_vectorize_load():
    num_thread = 8
    def check_cuda(dtype, n, lanes):
        if not tvm.gpu(0).exist or not tvm.module.enabled("cuda"):
            print("skip because cuda is not enabled..")
            return
        ctx = tvm.gpu(0)
        A = tvm.placeholder((n,), name='A', dtype="%sx%d" % (dtype, lanes))
        B = tvm.compute((n,), lambda i: A[i], name='B')
        s = tvm.create_schedule(B.op)
        bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
        s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
        s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
        fun = tvm.build(s, [A, B], "cuda", name="vector_load")
        np_a = np.random.randint(low=-128, high=127, size=(n,lanes))
        a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(np_a)
        b = tvm.nd.empty((n,), B.dtype, ctx)
        fun(a,b)
        np.testing.assert_allclose(a.asnumpy(), b.asnumpy())
    check_cuda("int8", 64, 8)
    check_cuda("int8", 64, 16) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:23,代码来源:test_codegen_cuda.py


注:本文中的tvm.compute方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。