本文整理汇总了Python中toolz.merge函数的典型用法代码示例。如果您正苦于以下问题:Python merge函数的具体用法?Python merge怎么用?Python merge使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了merge函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: elemwise
def elemwise(op, *args, **kwargs):
""" Elementwise operation for dask.Dataframes """
columns = kwargs.get('columns', None)
name = kwargs.get('name', None)
_name = 'elemwise' + next(tokens)
dfs = [arg for arg in args if isinstance(arg, _Frame)]
other = [(i, arg) for i, arg in enumerate(args)
if not isinstance(arg, _Frame)]
if other:
op2 = partial_by_order(op, other)
else:
op2 = op
if not all(df.divisions == dfs[0].divisions for df in dfs):
msg = 'All dask.Dataframe and dask.Series must have same divisions'
raise ValueError(msg)
if not all(df.npartitions == dfs[0].npartitions for df in dfs):
msg = 'All dask.Dataframe and dask.Series must have same npartitions'
raise ValueError(msg)
dsk = dict(((_name, i), (op2,) + frs)
for i, frs in enumerate(zip(*[df._keys() for df in dfs])))
if columns is not None:
return DataFrame(merge(dsk, *[df.dask for df in dfs]),
_name, columns, dfs[0].divisions)
else:
column_name = name or consistent_name(n for df in dfs
for n in df.columns)
return Series(merge(dsk, *[df.dask for df in dfs]),
_name, column_name, dfs[0].divisions)
示例2: from_imperative
def from_imperative(dfs, columns, divisions=None):
""" Create DataFrame from many imperative objects
Parameters
----------
dfs: list of Values
An iterable of dask.imperative.Value objects, such as come from dask.do
These comprise the individual partitions of the resulting dataframe
columns: list or string
The list of column names if the result is a DataFrame
Or the single column name if the result is a Series
divisions: list or None
"""
from dask.imperative import Value
if isinstance(dfs, Value):
dfs = [dfs]
dsk = merge(df.dask for df in dfs)
name = 'from-imperative-' + tokenize(*dfs)
names = [(name, i) for i in range(len(dfs))]
values = [df.key for df in dfs]
dsk2 = dict(zip(names, values))
if divisions is None:
divisions = [None] * (len(dfs) + 1)
if isinstance(columns, str):
return Series(merge(dsk, dsk2), name, columns, divisions)
else:
return DataFrame(merge(dsk, dsk2), name, columns, divisions)
示例3: to_hdf
def to_hdf(df, path_or_buf, key, mode='a', append=False, complevel=0,
complib=None, fletcher32=False, get=get_sync, dask_kwargs=None,
name_function=None, compute=True, **kwargs):
name = 'to-hdf-' + uuid.uuid1().hex
pd_to_hdf = getattr(df._partition_type, 'to_hdf')
# if path_or_buf is string, format using i_name
if isinstance(path_or_buf, str):
if path_or_buf.count('*') + key.count('*') > 1:
raise ValueError("A maximum of one asterisk is accepted in file path and dataset key")
fmt_obj = lambda path_or_buf, i_name: path_or_buf.replace('*', i_name)
else:
if key.count('*') > 1:
raise ValueError("A maximum of one asterisk is accepted in dataset key")
fmt_obj = lambda path_or_buf, _: path_or_buf
if name_function is None:
name_function = build_name_function(df.npartitions - 1)
# we guarantee partition order is preserved when its saved and read
# so we enforce name_function to maintain the order of its input.
if '*' in key or (isinstance(path_or_buf, str) and '*' in path_or_buf):
formatted_names = [name_function(i) for i in range(df.npartitions)]
if formatted_names != sorted(formatted_names):
warn("In order to preserve order between partitions "
"name_function must preserve the order of its input")
dsk = dict()
i_name = name_function(0)
dsk[(name, 0)] = (_link, None,
(apply, pd_to_hdf,
(tuple, [(df._name, 0), fmt_obj(path_or_buf, i_name),
key.replace('*', i_name)]),
merge(kwargs,
{'mode': mode, 'format': 'table', 'append': append,
'complevel': complevel, 'complib': complib,
'fletcher32': fletcher32})))
for i in range(1, df.npartitions):
i_name = name_function(i)
dsk[(name, i)] = (_link, (name, i - 1),
(apply, pd_to_hdf,
(tuple, [(df._name, i), fmt_obj(path_or_buf, i_name),
key.replace('*', i_name)]),
merge(kwargs,
{'mode': 'a', 'format': 'table', 'append': True,
'complevel': complevel, 'complib': complib,
'fletcher32': fletcher32})))
dask_kwargs = dask_kwargs or {}
dsk = merge(df.dask, dsk)
key = (name, df.npartitions - 1)
if compute:
return DataFrame._get(dsk, key, get=get, **dask_kwargs)
else:
return Delayed(key, [dsk])
示例4: set_partition
def set_partition(f, index, divisions, get=threaded.get, **kwargs):
""" Set new partitioning along index given divisions """
divisions = unique(divisions)
name = next(names)
if isinstance(index, Series):
assert index.divisions == f.divisions
dsk = dict(((name, i), (f._partition_type.set_index, block, ind))
for i, (block, ind) in enumerate(zip(f._keys(), index._keys())))
f2 = type(f)(merge(f.dask, index.dask, dsk), name,
f.column_info, f.divisions)
else:
dsk = dict(((name, i), (f._partition_type.set_index, block, index))
for i, block in enumerate(f._keys()))
f2 = type(f)(merge(f.dask, dsk), name, f.column_info, f.divisions)
head = f2.head()
pf = pframe(like=head, divisions=divisions, **kwargs)
def append(block):
pf.append(block)
return 0
f2.map_blocks(append).compute(get=get)
pf.flush()
return from_pframe(pf)
示例5: _loc
def _loc(self, ind):
""" Helper function for the .loc accessor """
if not self.known_divisions:
raise ValueError(
"Can not use loc on DataFrame without known divisions")
name = next(names)
if not isinstance(ind, slice):
part = self._partition_of_index_value(ind)
dsk = {(name, 0): (lambda df: df.loc[ind], (self._name, part))}
return type(self)(merge(self.dask, dsk), name,
self.column_info, [])
else:
assert ind.step in (None, 1)
if ind.start:
start = self._partition_of_index_value(ind.start)
else:
start = 0
if ind.stop is not None:
stop = self._partition_of_index_value(ind.stop)
else:
stop = self.npartitions - 1
if stop == start:
dsk = {(name, 0): (_loc, (self._name, start), ind.start, ind.stop)}
else:
dsk = merge(
{(name, 0): (_loc, (self._name, start), ind.start, None)},
dict(((name, i), (self._name, start + i))
for i in range(1, stop - start)),
{(name, stop - start): (_loc, (self._name, stop), None, ind.stop)})
return type(self)(merge(self.dask, dsk), name, self.column_info,
self.divisions[start:stop])
示例6: compute
def compute(*args, **kwargs):
"""Compute several dask collections at once.
Examples
--------
>>> import dask.array as da
>>> a = da.arange(10, chunks=2).sum()
>>> b = da.arange(10, chunks=2).mean()
>>> compute(a, b)
(45, 4.5)
"""
groups = groupby(attrgetter('_optimize'), args)
get = kwargs.pop('get', None) or _globals['get']
if not get:
get = args[0]._default_get
if not all(a._default_get == get for a in args):
raise ValueError("Compute called on multiple collections with "
"differing default schedulers. Please specify a "
"scheduler `get` function using either "
"the `get` kwarg or globally with `set_options`.")
dsk = merge([opt(merge([v.dask for v in val]), [v._keys() for v in val])
for opt, val in groups.items()])
keys = [arg._keys() for arg in args]
results = get(dsk, keys, **kwargs)
return tuple(a._finalize(a, r) for a, r in zip(args, results))
示例7: from_imperative
def from_imperative(dfs, metadata=None, divisions=None, columns=None):
""" Create DataFrame from many imperative objects
Parameters
----------
dfs: list of Values
An iterable of dask.imperative.Value objects, such as come from dask.do
These comprise the individual partitions of the resulting dataframe
metadata: list or string of column names or empty dataframe
divisions: list or None
"""
if columns is not None:
print("Deprecation warning: Use metadata argument, not columns")
metadata = columns
from dask.imperative import Value
if isinstance(dfs, Value):
dfs = [dfs]
dsk = merge(df.dask for df in dfs)
name = 'from-imperative-' + tokenize(*dfs)
names = [(name, i) for i in range(len(dfs))]
values = [df.key for df in dfs]
dsk2 = dict(zip(names, values))
if divisions is None:
divisions = [None] * (len(dfs) + 1)
if isinstance(metadata, str):
return Series(merge(dsk, dsk2), name, metadata, divisions)
else:
return DataFrame(merge(dsk, dsk2), name, metadata, divisions)
示例8: elemwise
def elemwise(op, *args, **kwargs):
""" Elementwise operation for dask.Dataframes """
columns = kwargs.get('columns', None)
name = kwargs.get('name', None)
_name = next(names)
frames = [arg for arg in args if isinstance(arg, _Frame)]
other = [(i, arg) for i, arg in enumerate(args)
if not isinstance(arg, _Frame)]
if other:
op2 = partial_by_order(op, other)
else:
op2 = op
assert all(f.divisions == frames[0].divisions for f in frames)
assert all(f.npartitions == frames[0].npartitions for f in frames)
dsk = dict(((_name, i), (op2,) + frs)
for i, frs in enumerate(zip(*[f._keys() for f in frames])))
if columns is not None:
return DataFrame(merge(dsk, *[f.dask for f in frames]),
_name, columns, frames[0].divisions)
else:
column_name = name or consistent_name(n for f in frames
for n in f.columns)
return Series(merge(dsk, *[f.dask for f in frames]),
_name, column_name, frames[0].divisions)
示例9: to_hdf
def to_hdf(df, path_or_buf, key, mode='a', append=False, complevel=0,
complib=None, fletcher32=False, get=get_sync, dask_kwargs=None,
**kwargs):
name = 'to-hdf-' + uuid.uuid1().hex
pd_to_hdf = getattr(df._partition_type, 'to_hdf')
dsk = dict()
dsk[(name, 0)] = (_link, None,
(apply, pd_to_hdf,
(tuple, [(df._name, 0), path_or_buf, key]),
merge(kwargs,
{'mode': mode, 'format': 'table', 'append': append,
'complevel': complevel, 'complib': complib,
'fletcher32': fletcher32})))
for i in range(1, df.npartitions):
dsk[(name, i)] = (_link, (name, i - 1),
(apply, pd_to_hdf,
(tuple, [(df._name, i), path_or_buf, key]),
merge(kwargs,
{'mode': 'a', 'format': 'table', 'append': True,
'complevel': complevel, 'complib': complib,
'fletcher32': fletcher32})))
dask_kwargs = dask_kwargs or {}
DataFrame._get(merge(df.dask, dsk), (name, df.npartitions - 1),
get=get, **dask_kwargs)
示例10: f
def f(c, a, b):
keys = yield _scatter((c.ip, c.port), [1, 2, 3])
assert merge(a.data, b.data) == \
{k: i for k, i in zip(keys, [1, 2, 3])}
assert set(c.who_has) == set(keys)
assert all(len(v) == 1 for v in c.who_has.values())
keys2, who_has, nbytes = yield scatter_to_workers([a.address, b.address],
[4, 5, 6])
m = merge(a.data, b.data)
for k, v in zip(keys2, [4, 5, 6]):
assert m[k] == v
assert isinstance(who_has, dict)
assert set(concat(who_has.values())) == {a.address, b.address}
assert len(who_has) == len(keys2)
assert isinstance(nbytes, dict)
assert set(nbytes) == set(who_has)
assert all(isinstance(v, int) for v in nbytes.values())
result = yield _gather((c.ip, c.port), keys2)
assert result == [4, 5, 6]
示例11: apply
def apply(self, latitude, longitude, latitude_mask, **kwargs):
latitude = (latitude.T - data.train_gps_mean[0]) / data.train_gps_std[0]
longitude = (longitude.T - data.train_gps_mean[1]) / data.train_gps_std[1]
latitude_mask = latitude_mask.T
rec_in = tensor.concatenate((latitude[:, :, None], longitude[:, :, None]),
axis=2)
path = self.rec.apply(merge(self.fwd_fork.apply(rec_in, as_dict=True),
{'mask': latitude_mask}),
merge(self.bkwd_fork.apply(rec_in, as_dict=True),
{'mask': latitude_mask}))[0]
last_id = tensor.cast(latitude_mask.sum(axis=0) - 1, dtype='int64')
path_representation = (path[0][:, -self.config.rec_state_dim:],
path[last_id - 1, tensor.arange(last_id.shape[0])]
[:, :self.config.rec_state_dim])
embeddings = tuple(self.context_embedder.apply(
**{k: kwargs[k] for k in self.context_embedder.inputs }))
inputs = tensor.concatenate(path_representation + embeddings, axis=1)
outputs = self.rec_to_output.apply(inputs)
return outputs
示例12: from_imperative
def from_imperative(values):
""" Create bag from many imperative objects
Parameters
----------
values: list of Values
An iterable of dask.imperative.Value objects, such as come from dask.do
These comprise the individual partitions of the resulting bag
Returns
-------
Bag
Examples
--------
>>> b = from_imperative([x, y, z]) # doctest: +SKIP
"""
from dask.imperative import Value
if isinstance(values, Value):
values = [values]
dsk = merge(v.dask for v in values)
name = 'bag-from-imperative-' + tokenize(*values)
names = [(name, i) for i in range(len(values))]
values = [v.key for v in values]
dsk2 = dict(zip(names, values))
return Bag(merge(dsk, dsk2), name, len(values))
示例13: apply
def apply(self, source_sentence, source_sentence_mask):
"""Produces source annotations, either non-recurrently or with
a bidirectional RNN architecture.
"""
# Time as first dimension
source_sentence = source_sentence.T
source_sentence_mask = source_sentence_mask.T
embeddings = self.lookup.apply(source_sentence)
representation = self.bidirs[0].apply(
merge(self.fwd_forks[0].apply(embeddings, as_dict=True),
{'mask': source_sentence_mask}),
merge(self.back_forks[0].apply(embeddings, as_dict=True),
{'mask': source_sentence_mask}))
for i in xrange(1, self.n_layers):
if self.skip_connections:
inp = tensor.concatenate([representation, embeddings],
axis=2)
else:
inp = representation
representation = self.bidirs[i].apply(
merge(self.fwd_forks[i].apply(inp, as_dict=True),
{'mask': source_sentence_mask}),
merge(self.back_forks[i].apply(inp, as_dict=True),
{'mask': source_sentence_mask})
)
return representation, source_sentence_mask
示例14: _loc_slice
def _loc_slice(self, ind):
name = 'loc-slice' + next(tokens)
assert ind.step in (None, 1)
if ind.start:
start = _partition_of_index_value(self.divisions, ind.start)
else:
start = 0
if ind.stop is not None:
stop = _partition_of_index_value(self.divisions, ind.stop)
else:
stop = self.npartitions - 1
istart = _coerce_loc_index(self.divisions, ind.start)
istop = _coerce_loc_index(self.divisions, ind.stop)
if stop == start:
dsk = {(name, 0): (_loc, (self._name, start), ind.start, ind.stop)}
divisions = [istart, istop]
else:
dsk = merge(
{(name, 0): (_loc, (self._name, start), ind.start, None)},
dict(((name, i), (self._name, start + i))
for i in range(1, stop - start)),
{(name, stop - start): (_loc, (self._name, stop), None, ind.stop)})
divisions = ((max(istart, self.divisions[start])
if ind.start is not None
else self.divisions[0],) +
self.divisions[start+1:stop+1] +
(min(istop, self.divisions[stop+1])
if ind.stop is not None
else self.divisions[-1],))
assert len(divisions) == len(dsk) + 1
return type(self)(merge(self.dask, dsk),
name, self.column_info,
divisions)
示例15: f
def f(c, a, b):
data = yield _scatter((c.ip, c.port), [1, 2, 3])
assert c.ip in str(data[0])
assert c.ip in repr(data[0])
assert merge(a.data, b.data) == \
{d.key: i for d, i in zip(data, [1, 2, 3])}
assert set(c.who_has) == {d.key for d in data}
assert all(len(v) == 1 for v in c.who_has.values())
result = yield [d._get() for d in data]
assert result == [1, 2, 3]
yield data[0]._delete()
assert merge(a.data, b.data) == \
{d.key: i for d, i in zip(data[1:], [2, 3])}
assert data[0].key not in c.who_has
data = yield scatter_to_workers((c.ip, c.port), [a.address, b.address],
[4, 5, 6])
m = merge(a.data, b.data)
for d, v in zip(data, [4, 5, 6]):
assert m[d.key] == v
result = yield _gather((c.ip, c.port), data)
assert result == [4, 5, 6]