本文整理汇总了Python中dask.bag.core.Bag.map_partitions方法的典型用法代码示例。如果您正苦于以下问题:Python Bag.map_partitions方法的具体用法?Python Bag.map_partitions怎么用?Python Bag.map_partitions使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask.bag.core.Bag
的用法示例。
在下文中一共展示了Bag.map_partitions方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_to_dataframe
# 需要导入模块: from dask.bag.core import Bag [as 别名]
# 或者: from dask.bag.core.Bag import map_partitions [as 别名]
def test_to_dataframe():
dd = pytest.importorskip('dask.dataframe')
pd = pytest.importorskip('pandas')
def check_parts(df, sol):
assert all((p.dtypes == sol.dtypes).all() for p in
dask.compute(*df.to_delayed()))
dsk = {('test', 0): [(1, 2)],
('test', 1): [],
('test', 2): [(10, 20), (100, 200)]}
b = Bag(dsk, 'test', 3)
sol = pd.DataFrame(b.compute(), columns=['a', 'b'])
# Elements are tuples
df = b.to_dataframe()
dd.utils.assert_eq(df, sol.rename(columns={'a': 0, 'b': 1}),
check_index=False)
df = b.to_dataframe(columns=['a', 'b'])
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
df = b.to_dataframe(meta=[('a', 'i8'), ('b', 'i8')])
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Elements are dictionaries
b = b.map(lambda x: dict(zip(['a', 'b'], x)))
df = b.to_dataframe()
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
assert df._name == b.to_dataframe()._name
# With metadata specified
for meta in [sol, [('a', 'i8'), ('b', 'i8')]]:
df = b.to_dataframe(meta=meta)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Error to specify both columns and meta
with pytest.raises(ValueError):
b.to_dataframe(columns=['a', 'b'], meta=sol)
# Single column
b = b.pluck('a')
sol = sol[['a']]
df = b.to_dataframe(meta=sol)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Works with iterators and tuples
sol = pd.DataFrame({'a': range(100)})
b = db.from_sequence(range(100), npartitions=5)
for f in [iter, tuple]:
df = b.map_partitions(f).to_dataframe(meta=sol)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)