Python toolz.reduce函数代码示例

本文整理汇总了Python中toolz.reduce函数的典型用法代码示例。如果您正苦于以下问题：Python reduce函数的具体用法？Python reduce怎么用？Python reduce使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了reduce函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fold

    def fold(self, binop, combine=None, initial=no_default, split_every=None):
        """ Parallelizable reduction

        Fold is like the builtin function ``reduce`` except that it works in
        parallel.  Fold takes two binary operator functions, one to reduce each
        partition of our dataset and another to combine results between
        partitions

        1.  ``binop``: Binary operator to reduce within each partition
        2.  ``combine``:  Binary operator to combine results from binop

        Sequentially this would look like the following:

        >>> intermediates = [reduce(binop, part) for part in partitions]  # doctest: +SKIP
        >>> final = reduce(combine, intermediates)  # doctest: +SKIP

        If only one function is given then it is used for both functions
        ``binop`` and ``combine`` as in the following example to compute the
        sum:

        >>> def add(x, y):
        ...     return x + y

        >>> b = from_sequence(range(5))
        >>> b.fold(add).compute()  # doctest: +SKIP
        10

        In full form we provide both binary operators as well as their default
        arguments

        >>> b.fold(binop=add, combine=add, initial=0).compute()  # doctest: +SKIP
        10

        More complex binary operators are also doable

        >>> def add_to_set(acc, x):
        ...     ''' Add new element x to set acc '''
        ...     return acc | set([x])
        >>> b.fold(add_to_set, set.union, initial=set()).compute()  # doctest: +SKIP
        {1, 2, 3, 4, 5}

        See Also
        --------

        Bag.foldby
        """
        token = tokenize(self, binop, combine, initial)
        combine = combine or binop
        a = 'foldbinop-{0}-{1}'.format(funcname(binop), token)
        b = 'foldcombine-{0}-{1}'.format(funcname(combine), token)
        initial = quote(initial)
        if initial is not no_default:
            return self.reduction(curry(_reduce, binop, initial=initial),
                                  curry(_reduce, combine),
                                  split_every=split_every)
        else:
            from toolz.curried import reduce
            return self.reduction(reduce(binop), reduce(combine),
                                  split_every=split_every)

开发者ID:rla3rd，项目名称:dask，代码行数:59，代码来源:core.py

示例2: test_fold

def test_fold():
    assert fold(add, range(10), 0) == reduce(add, range(10), 0)
    assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0)
    assert fold(add, range(10)) == fold(add, range(10), 0)

    def setadd(s, item):
        s = s.copy()
        s.add(item)
        return s

    assert fold(setadd, [1, 2, 3], set()) == set((1, 2, 3))
    assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union)
            == set((1, 2, 3)))

开发者ID:183amir，项目名称:toolz，代码行数:13，代码来源:test_parallel.py

示例3: column_map

def column_map(tables, columns):
    """
    Take a list of tables and a list of column names and resolve which
    columns come from which table.

    Parameters
    ----------
    tables : sequence of _DataFrameWrapper or _TableFuncWrapper
        Could also be sequence of modified pandas.DataFrames, the important
        thing is that they have ``.name`` and ``.columns`` attributes.
    columns : sequence of str
        The column names of interest.

    Returns
    -------
    col_map : dict
        Maps table names to lists of column names.

    """
    if not columns:
        return {t.name: None for t in tables}

    columns = set(columns)
    colmap = {t.name: list(set(t.columns).intersection(columns)) for t in tables}
    foundcols = toolz.reduce(lambda x, y: x.union(y), (set(v) for v in colmap.values()))
    if foundcols != columns:
        raise RuntimeError('Not all required columns were found. '
                           'Missing: {}'.format(list(columns - foundcols)))
    return colmap

开发者ID:advancedpartnerships，项目名称:urbansim，代码行数:29，代码来源:misc.py

示例4: estimate_graph_size

def estimate_graph_size(old_chunks, new_chunks):
    """ Estimate the graph size during a rechunk computation.
    """
    # Estimate the number of intermediate blocks that will be produced
    # (we don't use intersect_chunks() which is much more expensive)
    crossed_size = reduce(mul, (len(oc) + len(nc)
                                for oc, nc in zip(old_chunks, new_chunks)))
    return crossed_size

开发者ID:rlugojr，项目名称:dask，代码行数:8，代码来源:rechunk.py

示例5: init_db

def init_db(chanjo_db, bed_stream, overwrite=False):
  """Build a new database instance from the Chanjo BED stream.

  Args:
    chanjo_db (Store): initialized Store class instance
    bed_stream (sequence): Chanjo-style BED-stream
    overwrite (bool, optional): whether to automatically overwrite an
      existing database, defaults to False
  """
  # check if the database already exists (expect 'mysql' to exist)
  # 'dialect' is in the form of '<db_type>+<connector>'
  if chanjo_db.dialect == 'mysql' or path(chanjo_db.uri).exists():
    if overwrite:
      # wipe the database clean with a warning
      chanjo_db.tare_down()
    elif chanjo_db.dialect == 'sqlite':
      # prevent from wiping existing database to easily
      raise OSError(errno.EEXIST, chanjo_db.uri)

  # set up new tables
  chanjo_db.set_up()

  superblocks = pipe(
    bed_stream,
    map(text_type.rstrip),
    map(split(sep='\t')),
    map(lambda row: bed_to_interval(*row)),
    map(build_interval(chanjo_db)),
    concat,
    aggregate,
    map(build_block(chanjo_db)),
    aggregate,
    map(build_superblock(chanjo_db))
  )

  # reduce the superblocks and commit every contig
  reduce(commit_per_contig(chanjo_db), superblocks, 'chr0')

  # commit also the last contig
  chanjo_db.save()

开发者ID:kern3020，项目名称:chanjo，代码行数:40，代码来源:core.py

示例6: get

  def get(self, dot_key, default=None, scope=None):
    """Get nested value using a dot separated key.

    Args:
      dot_key (str): key on the format "section.subsection.key"
      default (object, optional): default unless key exists
      scope (dict, optional): nested dict to decend into

    Returns:
      object: value for the key or the default object
    """
    if scope is None:
      scope = self

    return reduce(rget(default=default), dot_key.split('.'), scope)

开发者ID:dnil，项目名称:chanjo，代码行数:15，代码来源:core.py

示例7: stack

def stack(*imgs, **kwargs):
    """Combine images together, overlaying later images onto earlier ones.

    Parameters
    ----------
    imgs : iterable of Image
        The images to combine.
    how : str, optional
        The compositing operator to combine pixels. Default is `'over'`.
    """
    if not imgs:
        raise ValueError("No images passed in")
    for i in imgs:
        if not isinstance(i, Image):
            raise TypeError("Expected `Image`, got: `{0}`".format(type(i)))
    op = composite_op_lookup[kwargs.get('how', 'over')]
    if len(imgs) == 1:
        return imgs[0]
    imgs = xr.align(*imgs, copy=False, join='outer')
    out = tz.reduce(tz.flip(op), [i.data for i in imgs])
    return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)

开发者ID:brendancol，项目名称:datashader，代码行数:21，代码来源:transfer_functions.py

示例8: column_list

def column_list(tables, columns):
    """
    Take a list of tables and a list of column names and return the columns
    that are present in the tables.

    Parameters
    ----------
    tables : sequence of _DataFrameWrapper or _TableFuncWrapper
        Could also be sequence of modified pandas.DataFrames, the important
        thing is that they have ``.name`` and ``.columns`` attributes.
    columns : sequence of str
        The column names of interest.

    Returns
    -------
    cols : list
        Lists of column names available in the tables.

    """
    columns = set(columns)
    foundcols = toolz.reduce(lambda x, y: x.union(y), (set(t.columns) for t in tables))
    return list(columns.intersection(foundcols))

开发者ID:advancedpartnerships，项目名称:urbansim，代码行数:22，代码来源:misc.py

示例9: compute_one

def compute_one(expr, c, **kwargs):
    c = iter(c)
    n = 0
    cs = []
    for chunk in c:
        cs.append(chunk)
        n += len(chunk)
        if n >= expr.n:
            break

    if not cs:
        return []

    if len(cs) == 1:
        return compute_one(expr, cs[0])

    t1 = TableSymbol('t1', expr.schema)
    t2 = TableSymbol('t2', expr.schema)
    binop = lambda a, b: compute(union(t1, t2), {t1: a, t2: b})
    u = reduce(binop, cs)

    return compute_one(expr, u)

开发者ID:holdenk，项目名称:blaze，代码行数:22，代码来源:chunks.py

示例10: Filter

def Filter(t, *conditions):
    return t[reduce(and_, conditions)]

开发者ID:chdoig，项目名称:blaze，代码行数:2，代码来源:dplyr.py

示例11: _reduce

def _reduce(binop, sequence, initial=no_default):
    if initial is not no_default:
        return reduce(binop, sequence, initial)
    else:
        return reduce(binop, sequence)

开发者ID:jcorbin，项目名称:dask，代码行数:5，代码来源:core.py

示例12: find_merge_rechunk

def find_merge_rechunk(old_chunks, new_chunks, block_size_limit):
    """
    Find an intermediate rechunk that would merge some adjacent blocks
    together in order to get us nearer the *new_chunks* target, without
    violating the *block_size_limit* (in number of elements).
    """
    ndim = len(old_chunks)

    old_largest_width = [max(c) for c in old_chunks]
    new_largest_width = [max(c) for c in new_chunks]

    graph_size_effect = {
        dim: len(nc) / len(oc)
        for dim, (oc, nc) in enumerate(zip(old_chunks, new_chunks))
    }

    block_size_effect = {
        dim: new_largest_width[dim] / old_largest_width[dim]
        for dim in range(ndim)
    }

    # Our goal is to reduce the number of nodes in the rechunk graph
    # by merging some adjacent chunks, so consider dimensions where we can
    # reduce the # of chunks
    merge_candidates = [dim for dim in range(ndim)
                        if graph_size_effect[dim] <= 1.0]

    # Merging along each dimension reduces the graph size by a certain factor
    # and increases memory largest block size by a certain factor.
    # We want to optimize the graph size while staying below the given
    # block_size_limit.  This is in effect a knapsack problem, except with
    # multiplicative values and weights.  Just use a greedy algorithm
    # by trying dimensions in decreasing value / weight order.
    def key(k):
        gse = graph_size_effect[k]
        bse = block_size_effect[k]
        if bse == 1:
            bse = 1 + 1e-9
        return np.log(gse) / np.log(bse)

    sorted_candidates = sorted(merge_candidates, key=key)

    largest_block_size = reduce(mul, old_largest_width)

    chunks = list(old_chunks)
    memory_limit_hit = False

    for dim in sorted_candidates:
        # Examine this dimension for possible graph reduction
        new_largest_block_size = (
            largest_block_size * new_largest_width[dim] // old_largest_width[dim])
        if new_largest_block_size <= block_size_limit:
            # Full replacement by new chunks is possible
            chunks[dim] = new_chunks[dim]
            largest_block_size = new_largest_block_size
        else:
            # Try a partial rechunk, dividing the new chunks into
            # smaller pieces
            largest_width = old_largest_width[dim]
            chunk_limit = int(block_size_limit * largest_width / largest_block_size)
            c = divide_to_width(new_chunks[dim], chunk_limit)
            if len(c) <= len(old_chunks[dim]):
                # We manage to reduce the number of blocks, so do it
                chunks[dim] = c
                largest_block_size = largest_block_size * max(c) // largest_width

            memory_limit_hit = True

    assert largest_block_size == _largest_block_size(chunks)
    assert largest_block_size <= block_size_limit
    return tuple(chunks), memory_limit_hit

开发者ID:rlugojr，项目名称:dask，代码行数:71，代码来源:rechunk.py

示例13: _largest_block_size

def _largest_block_size(chunks):
    return reduce(mul, map(max, chunks))

开发者ID:rlugojr，项目名称:dask，代码行数:2，代码来源:rechunk.py

示例14: _number_of_blocks

def _number_of_blocks(chunks):
    return reduce(mul, map(len, chunks))

开发者ID:rlugojr，项目名称:dask，代码行数:2，代码来源:rechunk.py

示例15: test__bi_gram

def test__bi_gram(string, expected):
    assert(list(tkn.bi_gram(string)) == expected)


@pytest.mark.parametrize("string,expected",
                         [("foo-bar", []),
                          ("foobazbar", []),
                          ("foo*bar*baz", ["foo_bar_baz"]),
                          ])
def test__tri_gram(string, expected):
    assert(list(tkn.tri_gram(string)) == expected)


sum_tally_tuples = lambda tpls: reduce_c(lambda x, y: x+y[1], tpls, 0)
extext = tlz.reduce(lambda x, y: x+y, ["aaa " * 20,
                                       "bbb " * 10,
                                       "ccc " * 3,
                                       "ddd " * 1])


@pytest.mark.parametrize("string,length,total,parser",
                         [(extext, 4, 34, tkn.uni_gram),

                          ])
def test___gram_counts(string, length, total, parser):
    bow = tkn.gram_counts(parser, string)
    assert(len(bow) == length)
    assert(sum_tally_tuples(bow) == total)


@pytest.mark.parametrize("string,length,total",
                         [(extext, 4, 34),

开发者ID:steven-cutting，项目名称:SimpleTokenizer，代码行数:32，代码来源:test_tokenize.py

注：本文中的toolz.reduce函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。