Python tvm.reduce_axis函数代码示例

本文整理汇总了Python中tvm.reduce_axis函数的典型用法代码示例。如果您正苦于以下问题：Python reduce_axis函数的具体用法？Python reduce_axis怎么用？Python reduce_axis使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了reduce_axis函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lstm_cell_inline

def test_lstm_cell_inline():
    num_step = 128
    num_input = 256
    num_hidden = 1152
    batch_size = 4
    # Global transition matrix
    X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
    Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
    Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
    # h: output hidden state, c: cell state.
    s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
    s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
    s_init_c = tvm.compute((1, batch_size, num_hidden),
                           lambda *i: 0.0, name="init_c")
    s_init_h = tvm.compute((1, batch_size, num_hidden),
                           lambda *i: 0.0, name="init_h")
    # LSTM transition
    k = tvm.reduce_axis((0, num_input), name="ki2h")
    s_i2h = tvm.compute(
        (num_step, 4, batch_size, num_hidden),
        lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
        name="s_i2h")
    k = tvm.reduce_axis((0, num_hidden), name="ki2h")
    s_h2h = tvm.compute(
        (num_step, 4, batch_size, num_hidden),
        lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
        name="s_h2h")
    # Gate rules
    gates = tvm.compute(s_i2h.shape, lambda *i:
                        s_i2h(*i) + s_h2h(*i), name="gates")
    gshape = (num_step, batch_size, num_hidden)
    in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
    in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
    forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
    out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
    next_c = tvm.compute(gshape,
                         lambda t, i, j:
                         forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
                         in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
    next_h = tvm.compute(gshape,
                         lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
    update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
    update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
    # schedule
    scan_h, scan_c = tvm.scan(
        [s_init_h, s_init_c],
        [update_h, update_c],
        [s_state_h, s_state_c],
        inputs=[X],
        name="lstm_scan")
    # schedule
    s = tvm.create_schedule(scan_h.op)
    # Inline gate computations
    s[gates].compute_inline()
    s[in_gate].compute_inline()
    s[in_transform].compute_inline()
    s[forget_gate].compute_inline()
    s[out_gate].compute_inline()
    # verify we can lower correctly
    tvm.lower(s, [X, Wi2h, Wh2h, scan_h, scan_c])

开发者ID:LANHUIYING，项目名称:tvm，代码行数:60，代码来源:test_schedule_lstm.py

示例2: test_rfactor

def test_rfactor():
    n = tvm.var('n')
    k1 = tvm.reduce_axis((0, n), name="k1")
    k2 = tvm.reduce_axis((0, n), name="k2")
    A = tvm.placeholder((n, n, n), name='A')
    B = tvm.compute((n, ), lambda i: tvm.sum(A[i, k1, k2], axis=[k1, k2]))
    # normal schedule
    s = tvm.create_schedule(B.op)
    BF = s.rfactor(B, k1)
    assert(tuple(BF.shape) == (n, n))
    assert(set(BF.op.body[0].axis) == set([k2]))
    assert(s[B].op.body[0].axis[0].dom.extent == n)
    assert(len(s[B].all_iter_vars) == 2)
    # schedule with splot
    s = tvm.create_schedule(B.op)
    ko, ki = s[B].split(k1, factor=4)
    xo, xi = s[B].split(B.op.axis[0], factor=8)
    BF = s.rfactor(B, ki)
    assert(BF.shape[0].value == 4)
    assert(BF.shape[1] == n)
    assert(BF.op.body[0].axis[0] ==  k2)
    assert(BF.op.body[0].axis[1].var ==  ko.var)
    assert(s[B].op.body[0].axis[0].dom.extent.value == 4)
    # schedule with factor_axis
    s = tvm.create_schedule(B.op)
    ko, ki = s[B].split(k1, factor=4)
    xo, xi = s[B].split(B.op.axis[0], factor=8)
    BF = s.rfactor(B, ki, 1)
    assert(n == BF.shape[0])
    assert(BF.shape[1].value == 4)
    assert(BF.op.body[0].axis[0] ==  k2)
    assert(BF.op.body[0].axis[1].var ==  ko.var)
    assert(s[B].op.body[0].axis[0].dom.extent.value == 4)

开发者ID:bddppq，项目名称:tvm，代码行数:33，代码来源:test_lang_schedule.py

示例3: global_pool

def global_pool(data, pool_type):
    """Perform global pooling on the data

    Parameters
    ----------
    data : tvm.Tensor
        4-D with shape [batch, channel, in_height, in_width]

    pool_type : str
        Pool type, 'max' or 'avg'

    Returns
    -------
    output : tvm.Tensor
        4-D with shape [batch, channel, 1, 1]
    """
    assert len(data.shape) == 4, "only support 4-dim pooling"
    batch, channel, height, width = data.shape

    dheight = tvm.reduce_axis((0, height))
    dwidth = tvm.reduce_axis((0, width))

    if pool_type == 'max':
        return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
                            tvm.max(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
                            tag="global_pool_max")
    elif pool_type == 'avg':
        tsum = tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
                            tvm.sum(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
                            tag="global_pool_sum")
        return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
                            tsum[n, c, h, w] / (height*width).astype(tsum.dtype), \
                            tag=tag.ELEMWISE)
    else:
        raise ValueError("Pool type should be 'avg' or 'max'.")

开发者ID:gwli，项目名称:tvm，代码行数:35，代码来源:pooling.py

示例4: _sample

    def _sample(i, c, ph, pw):
        roi = rois[i]
        batch_index = roi[0].astype('int32')
        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
        roi_start_h *= spatial_scale
        roi_end_h *= spatial_scale
        roi_start_w *= spatial_scale
        roi_end_w *= spatial_scale

        # force malformed ROIs to be 1x1
        roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype))
        roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype))

        bin_h = roi_h / pooled_size_h
        bin_w = roi_w / pooled_size_w

        if sample_ratio > 0:
            roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32')
        else:
            roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32')
            roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32')

        count = roi_bin_grid_h * roi_bin_grid_w
        rh = tvm.reduce_axis((0, roi_bin_grid_h))
        rw = tvm.reduce_axis((0, roi_bin_grid_w))
        roi_start_h += ph * bin_h
        roi_start_w += pw * bin_w
        return tvm.sum(_bilinear(batch_index, c,
                                 roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
                                 roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count,
                       axis=[rh, rw])

开发者ID:bddppq，项目名称:tvm，代码行数:31，代码来源:roi_align.py

示例5: test_in_bounds_conv_llvm

def test_in_bounds_conv_llvm(loop_tiling=False):
    HSTR = WSTR = 1
    in_channel = 128
    kernel_height = kernel_width = 3
    out_channel = 64
    batch_size = 1
    in_height = in_width = 64
    out_height = out_width = in_height - kernel_height + 1
    data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
    kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
        out_channel), name='kernel')
    ic = tvm.reduce_axis((0, in_channel), name='ic')
    kh = tvm.reduce_axis((0, kernel_height), name='kh')
    kw = tvm.reduce_axis((0, kernel_width), name='kw')
    conv = tvm.compute((batch_size, out_channel, out_height, out_width),
                       lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
                                                     kernel[kh, kw, ic, oc],
                                                     axis=[ic, kh, kw]),
                       name="conv2d")
    s = tvm.create_schedule(conv.op)

    n, oc, oh, ow = conv.op.axis
    if loop_tiling:
        oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
    lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
    print (lowered_func.body)
    ctx = tvm.cpu (0)

    f = tvm.build(s, [data, kernel, conv], "llvm")
    data_input = tvm.nd.array(np.random.uniform(
          size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
    kernel_input = tvm.nd.array(np.random.uniform(
          size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
    conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
    f(data_input, kernel_input, conv_out)

开发者ID:bddppq，项目名称:tvm，代码行数:35，代码来源:test_pass_bound_checkers.py

示例6: test_conv_tiling

def test_conv_tiling():
    HSTR = WSTR = 1
    in_channel = 128
    kernel_height = kernel_width = 3
    out_channel = 64
    batch_size = 1
    in_height = in_width = 64
    out_height = out_width = in_height - kernel_height + 1
    data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
    kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
        out_channel), name='kernel')
    ic = tvm.reduce_axis((0, in_channel), name='ic')
    kh = tvm.reduce_axis((0, kernel_height), name='kh')
    kw = tvm.reduce_axis((0, kernel_width), name='kw')
    conv = tvm.compute((batch_size, out_channel, out_height, out_width),
                       lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
                                                     kernel[kh, kw, ic, oc],
                                                     axis=[ic, kh, kw]),
                       name="conv2d")
    s = tvm.create_schedule(conv.op)

    n, oc, oh, ow = conv.op.axis
    oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
    stmt = tvm.ir_pass.LoopPartition(stmt, True)
    stmt = tvm.ir_pass.Simplify(stmt)
    assert(not any(collect_visit(stmt, lambda x: isinstance(x, tvm.stmt.IfThenElse))))

开发者ID:bddppq，项目名称:tvm，代码行数:28，代码来源:test_pass_loop_partition.py

示例7: _declaration_dense_nopack

def _declaration_dense_nopack(cfg, data, weight, bias=None, out_dtype=None):
    if out_dtype is None:
        out_dtype = data.dtype
    batch, in_dim = get_const_tuple(data.shape)
    out_dim, _ = get_const_tuple(weight.shape)
    # create tuning space
    cfg.define_split("tile_x", out_dim, num_outputs=2)
    cfg.define_split("tile_y", batch, num_outputs=2)
    cfg.define_split("tile_k", in_dim, num_outputs=2)
    if cfg.is_fallback:
        _default_dense_nopack_config(cfg, batch, out_dim, in_dim)

    vec = cfg["tile_k"].size[-1]
    k = tvm.reduce_axis((0, in_dim // vec), "k")
    CC = tvm.compute((batch, out_dim, vec),
                     lambda z, y, x: tvm.sum(
                         data[z, k * vec + x].astype(out_dtype) *
                         weight[y, k * vec + x].astype(out_dtype), axis=k))

    kk = tvm.reduce_axis((0, vec), "kk")
    C = tvm.compute((batch, out_dim),
                    lambda y, x: tvm.sum(CC[y, x, kk], axis=kk),
                    tag="dense_nopack")
    if bias is not None:
        C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
                        tag=tag.BROADCAST)

    return C

开发者ID:bddppq，项目名称:tvm，代码行数:28，代码来源:dense.py

示例8: packed_conv2d

def packed_conv2d(data,
                  kernel,
                  padding,
                  strides,
                  out_dtype="int32"):
    """ Packed conv2d function.
    """
    if padding[0]:
        pad_data = topi.nn.pad(data, [0, 0, padding[0], padding[1], 0, 0], name="pad_data")
    else:
        pad_data = data
    assert len(data.shape) == 6
    assert len(kernel.shape) == 6
    oheight = topi.util.simplify((pad_data.shape[2] - kernel.shape[2]) // strides[0] + 1)
    owidth = topi.util.simplify((pad_data.shape[3] - kernel.shape[3]) // strides[1] + 1)
    oshape = (data.shape[0], kernel.shape[0], oheight, owidth, data.shape[4], kernel.shape[4])

    ishape = topi.util.get_const_tuple(data.shape)
    kshape = topi.util.get_const_tuple(kernel.shape)
    assert data.dtype == "int8", data.dtype
    assert kernel.dtype == "int8", kernel.dtype
    d_i = tvm.reduce_axis((0, kshape[2]), name='d_i')
    d_j = tvm.reduce_axis((0, kshape[3]), name='d_j')
    k_o = tvm.reduce_axis((0, ishape[1]), name='k_o')
    k_i = tvm.reduce_axis((0, ishape[-1]), name='k_i')
    hstride, wstride = strides
    res = tvm.compute(
        oshape,
        lambda b_o, c_o, i, j, b_i, c_i: tvm.sum(
            pad_data[b_o, k_o, i*hstride+d_i, j*wstride+d_j, b_i, k_i].astype(out_dtype) *
            kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype),
            axis=[k_o, d_i, d_j, k_i]),
        name="res", tag="packed_conv2d")
    return res

开发者ID:LANHUIYING，项目名称:tvm，代码行数:34，代码来源:vta_conv2d.py

示例9: _spatial_pack

def _spatial_pack(data, kernel, stride, padding, out_dtype):
    """ Compute convolution with pack on spatial axes. """
    assert data.shape[0].value == 1, "spatial pack convolution only support batch size=1"
    wkl = _get_workload(data, kernel, stride, padding, out_dtype)
    sch = _get_schedule(wkl)

    H, W = wkl.height, wkl.width
    CI, CO = wkl.in_filter, wkl.out_filter
    KH, KW = wkl.hkernel, wkl.wkernel
    HPAD, WPAD = wkl.hpad, wkl.wpad
    HSTR, WSTR = wkl.hstride, wkl.wstride
    HCAT, WCAT = KH-1, KW-1

    VH = sch.vh
    VW = sch.vw
    VC = sch.vc
    UNROLL = sch.unroll

    TH = H + 2*HPAD
    TW = W + 2*WPAD
    OH = (H + 2*HPAD - KH) // HSTR + 1
    OW = (W + 2*WPAD - KW) // WSTR + 1

    dshape = (1, CI, H, W)
    dpshape = (1, CI, TH, TW)
    dvshape = (1, TH//(VH*HSTR), TW//(VW*WSTR), CI, VH*HSTR+HCAT, VW*WSTR+WCAT)

    kshape = (CO, CI, KH, KW)
    kvshape = (CO/VC, CI, KH, KW, VC)

    ovshape = (1, CO // VC, OH // VH, OW // VW, VH, VW, VC)
    oshape = (1, CO, OH, OW)

    DOPAD = (HPAD != 0 and WPAD != 0)
    if DOPAD:
        data_pad = pad(data, (0, 0, HPAD, WPAD), name="data_pad")
    else:
        data_pad = data

    data_vec = tvm.compute(dvshape, lambda n, h, w, ci, vh, vw: \
        data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw], name='data_vec')

    kernel_vec = tvm.compute(kvshape, lambda co, ci, dh, dw, vc: \
        kernel[co*VC+vc][ci][dh][dw], name='kernel_vec')

    ci = tvm.reduce_axis((0, CI), name='ci')
    dh = tvm.reduce_axis((0, KH), name='dh')
    dw = tvm.reduce_axis((0, KW), name='dw')

    conv = tvm.compute(ovshape, lambda n, co, h, w, vh, vw, vc: \
        tvm.sum(data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw].astype(out_dtype) *
                kernel_vec[co, ci, dh, dw, vc].astype(out_dtype),
                axis=[ci, dh, dw]), name='conv')

    output = tvm.compute(oshape, lambda n, co, h, w:
                         conv[n][co//VC][h/VH][w//VW][h%VH][w%VW][co%VC],
                         name='output_unpack', tag='spatial_conv_output')

    return output

开发者ID:gwli，项目名称:tvm，代码行数:59，代码来源:conv2d.py

示例10: test_tensor_reduce_multi_axis

def test_tensor_reduce_multi_axis():
    m = tvm.var('m')
    n = tvm.var('n')
    A = tvm.placeholder((m, n), name='A')
    k1 = tvm.reduce_axis((0, n), "k")
    k2 = tvm.reduce_axis((0, m), "k")
    C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2)))
    C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2]))

开发者ID:gwli，项目名称:tvm，代码行数:8，代码来源:test_lang_tensor.py

示例11: test_verify_compute

def test_verify_compute():
  n = tvm.var("n")
  m = tvm.var("m")
  A = tvm.placeholder((n, m), name='A')
  k = tvm.reduce_axis((0, m), "k")
  k_ = tvm.reduce_axis((0, m-1), "k_")
  f1 = lambda i: tvm.sum(A[i, k], axis=k)
  f2 = lambda i: A[i,0] + 1
  f3 = lambda i: tvm.sum(A[i, k], axis=k) + 1
  f4 = lambda i: A[i,0] * (tvm.sum(A[i, k], axis=k) + 1)
  f5 = lambda i: (tvm.sum(A[i, k], axis=k), A[i,0] + 1)
  f6 = lambda i: (tvm.sum(A[i, k], axis=k), tvm.sum(A[i, k_], axis=k_))

  #
  # Valid compute
  try:
    B = tvm.compute((n,), f1, name="B")
  except tvm._ffi.base.TVMError as ex:
    assert False

  #
  # Valid compute
  try:
    B = tvm.compute((n,), f2, name="B")
  except tvm._ffi.base.TVMError as ex:
    assert False

  #
  # Invalid compute with non top level reduction
  try:
    B = tvm.compute((n,), f3, name="B")
    assert False
  except tvm._ffi.base.TVMError as ex:
    pass

  #
  # Invalid compute with non top level reduction
  try:
    B = tvm.compute((n,), f4, name="B")
    assert False
  except tvm._ffi.base.TVMError as ex:
    pass

  #
  # Invalid compute with reduction and non-reduction batch ops
  try:
    B0, B1 = tvm.compute((n,), f5, name="B")
    assert False
  except tvm._ffi.base.TVMError as ex:
    pass

  #
  # Invalid compute with unequal batch reduction ops
  try:
    B0, B1 = tvm.compute((n,), f6, name="B")
    assert False
  except tvm._ffi.base.TVMError as ex:
    pass

开发者ID:LANHUIYING，项目名称:tvm，代码行数:58，代码来源:test_lang_verify_compute.py

示例12: depthwise_conv2d_backward_input_nhwc

def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, stride, padding):
    """Depthwise convolution nhwc backward wrt input operator.

    Parameters
    ----------
    Filter : tvm.Tensor
        4-D with shape [filter_height, filter_width, in_channel, channel_multiplier]

    Out_grad : tvm.Tensor
        4-D with shape [batch, out_height, out_width, out_channel]

    stride : tuple of two ints
        The spatial stride along height and width

    padding : int or str
        Padding size, or ['VALID', 'SAME']

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, in_height, in_width, in_channel]
    """
    batch, in_h, in_w, in_c = ishape
    _, out_h, out_w, out_c = oshape
    filter_h, filter_w, _, channel_multiplier = Filter.shape
    if isinstance(stride, int):
        stride_h = stride_w = stride
    else:
        stride_h, stride_w = stride

    dilated_out_grad = dilate(Out_grad, [1, stride_h, stride_w, 1], name='dilated_out_grad')

    # padding params in forward propagation
    fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
    # padding params in backward propagation
    bpad_top = filter_h - 1 - fpad_top
    bpad_bottom = (filter_h - 1 - fpad_bottom) + (stride_h - 1)
    bpad_left = filter_w - 1 - fpad_left
    bpad_right = (filter_w - 1 - fpad_right) + (stride_w - 1)

    padded_out_grad = pad(dilated_out_grad, \
                                  [0, bpad_top, bpad_left, 0], \
                                  [0, bpad_bottom, bpad_right, 0], \
                                  name='padded_out_grad')

    dh = tvm.reduce_axis((0, filter_h), name='dh')
    dw = tvm.reduce_axis((0, filter_w), name='dw')
    dc = tvm.reduce_axis((0, channel_multiplier), name='dc')

    In_grad = tvm.compute(
        (batch, in_h, in_w, in_c),
        lambda b, h, w, c: tvm.sum(padded_out_grad[b, h+dh, w+dw, c*channel_multiplier + dc] * \
                                   Filter[filter_h-1-dh, filter_w-1-dw, c, dc],
                                   axis=[dh, dw, dc]), tag='depthwise_conv2d_backward_input_nhwc')

    return In_grad

开发者ID:bddppq，项目名称:tvm，代码行数:56，代码来源:depthwise_conv2d.py

示例13: _spatial_pack_nhwc

def _spatial_pack_nhwc(data, kernel, stride, padding, activation_bits, weight_bits, out_dtype):
    """ Compute convolution with pack on spatial axes. """
    assert data.shape[0].value == 1, "spatial pack convolution only support batch size=1"
    wkl = _get_workload(data, kernel, stride, padding, out_dtype, "NHWC")
    sch = _get_schedule(wkl, "NHWC")
    VH = sch.vh
    VW = sch.vw
    VC = sch.vc

    data_q = bitpack(data, activation_bits, pack_axis=3, bit_axis=3, pack_type='uint8')
    kernel_vec = _kernel_vec_spatial_pack_nhwc(kernel, weight_bits, VC)
    N, H, W, IB, CI = data_q.shape
    OCO, KH, KW, KB, VC, _ = kernel_vec.shape

    CO = OCO * VC
    HPAD, WPAD, _, _ = get_pad_tuple(padding, kernel)

    if isinstance(stride, (tuple, list)):
        HSTR, WSTR = stride
    else:
        HSTR, WSTR = stride, stride
    HCAT, WCAT = KH-1, KW-1

    PAD_H = H + 2*HPAD
    PAD_W = W + 2*WPAD
    OH = (H + 2*HPAD - KH) // HSTR + 1
    OW = (W + 2*WPAD - KW) // WSTR + 1
    dvshape = (N, PAD_H//(VH*HSTR), PAD_W//(VW*WSTR), VH*HSTR+HCAT, VW*WSTR+WCAT, IB, CI)
    ovshape = (1, OH // VH, OW // VW, CO // VC, VH, VW, VC)
    oshape = (1, OH, OW, CO)

    if (HPAD != 0 and WPAD != 0):
        data_pad = pad(data_q, (0, HPAD, WPAD, 0, 0), name="data_pad")
    else:
        data_pad = data_q

    data_vec = tvm.compute(dvshape, lambda n, h, w, vh, vw, b, ci: \
        data_pad[n][h*VH*HSTR+vh][w*VW*WSTR+vw][b][ci], name='data_vec')

    ci = tvm.reduce_axis((0, CI), name='ci')
    dh = tvm.reduce_axis((0, KH), name='dh')
    dw = tvm.reduce_axis((0, KW), name='dw')
    ib = tvm.reduce_axis((0, IB), name='ib')
    kb = tvm.reduce_axis((0, KB), name='kb')

    def _conv(n, h, w, co, vh, vw, vc):
        return tvm.sum((tvm.popcount(
            kernel_vec[co, dh, dw, kb, vc, ci].astype('uint16') &
            data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ib, ci].astype('uint16'))
                        << (kb + ib).astype('uint16')), axis=[dh, dw, kb, ib, ci])

    conv = tvm.compute(ovshape, _conv, name='conv')

    return tvm.compute(oshape, lambda n, h, w, co:
                       conv[n][h//VH][w//VW][co//VC][h%VH][w%VW][co%VC].astype(out_dtype),
                       name='output_vec', tag='spatial_bitserial_conv_nhwc')

开发者ID:LANHUIYING，项目名称:tvm，代码行数:56，代码来源:bitserial_conv2d.py

示例14: conv2d_transpose_nchw

def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
    """Transposed 2D convolution nchw forward operator.

    Parameters
    ----------
    Input : tvm.Tensor
        4-D with shape [batch, in_channel, in_height, in_width]

    Filter : tvm.Tensor
        4-D with shape [in_channel, num_filter, filter_height, filter_width]

    strides : tuple of two ints
        The spatial stride along height and width

    padding : int or str
        Padding size, or ['VALID', 'SAME']

    out_dtype : str
        The output data type. This is used for mixed precision.

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, out_channel, out_height, out_width]
    """
    batch, in_c, in_h, in_w = Input.shape
    _, out_c, filter_h, filter_w = Filter.shape
    stride_h, stride_w = strides
    # dilate stage
    DilatedInput = dilate(Input, [1, 1, stride_h, stride_w], name='DilatedInput')
    # padding stage
    fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
    bpad_top = filter_h - 1 - fpad_top
    bpad_bottom = filter_h - 1 - fpad_bottom
    bpad_left = filter_w - 1 - fpad_left
    bpad_right = filter_w - 1 - fpad_right
    PaddedInput = pad(DilatedInput, \
                        [0, 0, bpad_top, bpad_left], \
                        [0, 0, bpad_bottom, bpad_right], \
                        name='PaddedInput')
    # convolution stage
    out_c = simplify(out_c)
    out_h = simplify((in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h)
    out_w = simplify((in_w - 1) * stride_w - fpad_left - fpad_right + filter_w)
    dc = tvm.reduce_axis((0, in_c), name='dc')
    dh = tvm.reduce_axis((0, filter_h), name='dh')
    dw = tvm.reduce_axis((0, filter_w), name='dw')

    Output = tvm.compute(
        (batch, out_c, out_h, out_w),
        lambda b, c, h, w: tvm.sum(
            PaddedInput[b, dc, h+dh, w+dw].astype(out_dtype) *
            Filter[dc, c, filter_h-1-dh, filter_w-1-dw].astype(out_dtype),
            axis=[dc, dh, dw]), tag="conv2d_transpose_nchw")

    return Output

开发者ID:bddppq，项目名称:tvm，代码行数:56，代码来源:conv2d_transpose.py

示例15: test_reduce_simplify

def test_reduce_simplify():
    ck = CanonicalChecker()
    k = tvm.reduce_axis((0, 10), name="k")
    j = tvm.reduce_axis((-5, 3), name="j")
    A = tvm.placeholder((10,), name='A')
    ck.verify(tvm.sum(tvm.expr.Select(k + j < 12, k + j, 0), [k, j]),
              tvm.sum(k + j, [k, j]))
    ck.verify(tvm.sum(A[3], []), A[3])
    # The rule below is not typical, removed for now
    ck.verify(tvm.sum(k / 10, k), tvm.sum(tvm.const(0, "int32"), k))

开发者ID:bddppq，项目名称:tvm，代码行数:10，代码来源:test_arith_canonical_simplify.py

注：本文中的tvm.reduce_axis函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。