本文整理汇总了Python中toolz.reduce函数的典型用法代码示例。如果您正苦于以下问题:Python reduce函数的具体用法?Python reduce怎么用?Python reduce使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了reduce函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fold
def fold(self, binop, combine=None, initial=no_default, split_every=None):
""" Parallelizable reduction
Fold is like the builtin function ``reduce`` except that it works in
parallel. Fold takes two binary operator functions, one to reduce each
partition of our dataset and another to combine results between
partitions
1. ``binop``: Binary operator to reduce within each partition
2. ``combine``: Binary operator to combine results from binop
Sequentially this would look like the following:
>>> intermediates = [reduce(binop, part) for part in partitions] # doctest: +SKIP
>>> final = reduce(combine, intermediates) # doctest: +SKIP
If only one function is given then it is used for both functions
``binop`` and ``combine`` as in the following example to compute the
sum:
>>> def add(x, y):
... return x + y
>>> b = from_sequence(range(5))
>>> b.fold(add).compute() # doctest: +SKIP
10
In full form we provide both binary operators as well as their default
arguments
>>> b.fold(binop=add, combine=add, initial=0).compute() # doctest: +SKIP
10
More complex binary operators are also doable
>>> def add_to_set(acc, x):
... ''' Add new element x to set acc '''
... return acc | set([x])
>>> b.fold(add_to_set, set.union, initial=set()).compute() # doctest: +SKIP
{1, 2, 3, 4, 5}
See Also
--------
Bag.foldby
"""
token = tokenize(self, binop, combine, initial)
combine = combine or binop
a = 'foldbinop-{0}-{1}'.format(funcname(binop), token)
b = 'foldcombine-{0}-{1}'.format(funcname(combine), token)
initial = quote(initial)
if initial is not no_default:
return self.reduction(curry(_reduce, binop, initial=initial),
curry(_reduce, combine),
split_every=split_every)
else:
from toolz.curried import reduce
return self.reduction(reduce(binop), reduce(combine),
split_every=split_every)
示例2: test_fold
def test_fold():
assert fold(add, range(10), 0) == reduce(add, range(10), 0)
assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0)
assert fold(add, range(10)) == fold(add, range(10), 0)
def setadd(s, item):
s = s.copy()
s.add(item)
return s
assert fold(setadd, [1, 2, 3], set()) == set((1, 2, 3))
assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union)
== set((1, 2, 3)))
示例3: column_map
def column_map(tables, columns):
"""
Take a list of tables and a list of column names and resolve which
columns come from which table.
Parameters
----------
tables : sequence of _DataFrameWrapper or _TableFuncWrapper
Could also be sequence of modified pandas.DataFrames, the important
thing is that they have ``.name`` and ``.columns`` attributes.
columns : sequence of str
The column names of interest.
Returns
-------
col_map : dict
Maps table names to lists of column names.
"""
if not columns:
return {t.name: None for t in tables}
columns = set(columns)
colmap = {t.name: list(set(t.columns).intersection(columns)) for t in tables}
foundcols = toolz.reduce(lambda x, y: x.union(y), (set(v) for v in colmap.values()))
if foundcols != columns:
raise RuntimeError('Not all required columns were found. '
'Missing: {}'.format(list(columns - foundcols)))
return colmap
示例4: estimate_graph_size
def estimate_graph_size(old_chunks, new_chunks):
""" Estimate the graph size during a rechunk computation.
"""
# Estimate the number of intermediate blocks that will be produced
# (we don't use intersect_chunks() which is much more expensive)
crossed_size = reduce(mul, (len(oc) + len(nc)
for oc, nc in zip(old_chunks, new_chunks)))
return crossed_size
示例5: init_db
def init_db(chanjo_db, bed_stream, overwrite=False):
"""Build a new database instance from the Chanjo BED stream.
Args:
chanjo_db (Store): initialized Store class instance
bed_stream (sequence): Chanjo-style BED-stream
overwrite (bool, optional): whether to automatically overwrite an
existing database, defaults to False
"""
# check if the database already exists (expect 'mysql' to exist)
# 'dialect' is in the form of '<db_type>+<connector>'
if chanjo_db.dialect == 'mysql' or path(chanjo_db.uri).exists():
if overwrite:
# wipe the database clean with a warning
chanjo_db.tare_down()
elif chanjo_db.dialect == 'sqlite':
# prevent from wiping existing database to easily
raise OSError(errno.EEXIST, chanjo_db.uri)
# set up new tables
chanjo_db.set_up()
superblocks = pipe(
bed_stream,
map(text_type.rstrip),
map(split(sep='\t')),
map(lambda row: bed_to_interval(*row)),
map(build_interval(chanjo_db)),
concat,
aggregate,
map(build_block(chanjo_db)),
aggregate,
map(build_superblock(chanjo_db))
)
# reduce the superblocks and commit every contig
reduce(commit_per_contig(chanjo_db), superblocks, 'chr0')
# commit also the last contig
chanjo_db.save()
示例6: get
def get(self, dot_key, default=None, scope=None):
"""Get nested value using a dot separated key.
Args:
dot_key (str): key on the format "section.subsection.key"
default (object, optional): default unless key exists
scope (dict, optional): nested dict to decend into
Returns:
object: value for the key or the default object
"""
if scope is None:
scope = self
return reduce(rget(default=default), dot_key.split('.'), scope)
示例7: stack
def stack(*imgs, **kwargs):
"""Combine images together, overlaying later images onto earlier ones.
Parameters
----------
imgs : iterable of Image
The images to combine.
how : str, optional
The compositing operator to combine pixels. Default is `'over'`.
"""
if not imgs:
raise ValueError("No images passed in")
for i in imgs:
if not isinstance(i, Image):
raise TypeError("Expected `Image`, got: `{0}`".format(type(i)))
op = composite_op_lookup[kwargs.get('how', 'over')]
if len(imgs) == 1:
return imgs[0]
imgs = xr.align(*imgs, copy=False, join='outer')
out = tz.reduce(tz.flip(op), [i.data for i in imgs])
return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
示例8: column_list
def column_list(tables, columns):
"""
Take a list of tables and a list of column names and return the columns
that are present in the tables.
Parameters
----------
tables : sequence of _DataFrameWrapper or _TableFuncWrapper
Could also be sequence of modified pandas.DataFrames, the important
thing is that they have ``.name`` and ``.columns`` attributes.
columns : sequence of str
The column names of interest.
Returns
-------
cols : list
Lists of column names available in the tables.
"""
columns = set(columns)
foundcols = toolz.reduce(lambda x, y: x.union(y), (set(t.columns) for t in tables))
return list(columns.intersection(foundcols))
示例9: compute_one
def compute_one(expr, c, **kwargs):
c = iter(c)
n = 0
cs = []
for chunk in c:
cs.append(chunk)
n += len(chunk)
if n >= expr.n:
break
if not cs:
return []
if len(cs) == 1:
return compute_one(expr, cs[0])
t1 = TableSymbol('t1', expr.schema)
t2 = TableSymbol('t2', expr.schema)
binop = lambda a, b: compute(union(t1, t2), {t1: a, t2: b})
u = reduce(binop, cs)
return compute_one(expr, u)
示例10: Filter
def Filter(t, *conditions):
return t[reduce(and_, conditions)]
示例11: _reduce
def _reduce(binop, sequence, initial=no_default):
if initial is not no_default:
return reduce(binop, sequence, initial)
else:
return reduce(binop, sequence)
示例12: find_merge_rechunk
def find_merge_rechunk(old_chunks, new_chunks, block_size_limit):
"""
Find an intermediate rechunk that would merge some adjacent blocks
together in order to get us nearer the *new_chunks* target, without
violating the *block_size_limit* (in number of elements).
"""
ndim = len(old_chunks)
old_largest_width = [max(c) for c in old_chunks]
new_largest_width = [max(c) for c in new_chunks]
graph_size_effect = {
dim: len(nc) / len(oc)
for dim, (oc, nc) in enumerate(zip(old_chunks, new_chunks))
}
block_size_effect = {
dim: new_largest_width[dim] / old_largest_width[dim]
for dim in range(ndim)
}
# Our goal is to reduce the number of nodes in the rechunk graph
# by merging some adjacent chunks, so consider dimensions where we can
# reduce the # of chunks
merge_candidates = [dim for dim in range(ndim)
if graph_size_effect[dim] <= 1.0]
# Merging along each dimension reduces the graph size by a certain factor
# and increases memory largest block size by a certain factor.
# We want to optimize the graph size while staying below the given
# block_size_limit. This is in effect a knapsack problem, except with
# multiplicative values and weights. Just use a greedy algorithm
# by trying dimensions in decreasing value / weight order.
def key(k):
gse = graph_size_effect[k]
bse = block_size_effect[k]
if bse == 1:
bse = 1 + 1e-9
return np.log(gse) / np.log(bse)
sorted_candidates = sorted(merge_candidates, key=key)
largest_block_size = reduce(mul, old_largest_width)
chunks = list(old_chunks)
memory_limit_hit = False
for dim in sorted_candidates:
# Examine this dimension for possible graph reduction
new_largest_block_size = (
largest_block_size * new_largest_width[dim] // old_largest_width[dim])
if new_largest_block_size <= block_size_limit:
# Full replacement by new chunks is possible
chunks[dim] = new_chunks[dim]
largest_block_size = new_largest_block_size
else:
# Try a partial rechunk, dividing the new chunks into
# smaller pieces
largest_width = old_largest_width[dim]
chunk_limit = int(block_size_limit * largest_width / largest_block_size)
c = divide_to_width(new_chunks[dim], chunk_limit)
if len(c) <= len(old_chunks[dim]):
# We manage to reduce the number of blocks, so do it
chunks[dim] = c
largest_block_size = largest_block_size * max(c) // largest_width
memory_limit_hit = True
assert largest_block_size == _largest_block_size(chunks)
assert largest_block_size <= block_size_limit
return tuple(chunks), memory_limit_hit
示例13: _largest_block_size
def _largest_block_size(chunks):
return reduce(mul, map(max, chunks))
示例14: _number_of_blocks
def _number_of_blocks(chunks):
return reduce(mul, map(len, chunks))
示例15: test__bi_gram
def test__bi_gram(string, expected):
assert(list(tkn.bi_gram(string)) == expected)
@pytest.mark.parametrize("string,expected",
[("foo-bar", []),
("foobazbar", []),
("foo*bar*baz", ["foo_bar_baz"]),
])
def test__tri_gram(string, expected):
assert(list(tkn.tri_gram(string)) == expected)
sum_tally_tuples = lambda tpls: reduce_c(lambda x, y: x+y[1], tpls, 0)
extext = tlz.reduce(lambda x, y: x+y, ["aaa " * 20,
"bbb " * 10,
"ccc " * 3,
"ddd " * 1])
@pytest.mark.parametrize("string,length,total,parser",
[(extext, 4, 34, tkn.uni_gram),
])
def test___gram_counts(string, length, total, parser):
bow = tkn.gram_counts(parser, string)
assert(len(bow) == length)
assert(sum_tally_tuples(bow) == total)
@pytest.mark.parametrize("string,length,total",
[(extext, 4, 34),