本文整理汇总了Python中dask.bag.core.Bag类的典型用法代码示例。如果您正苦于以下问题:Python Bag类的具体用法?Python Bag怎么用?Python Bag使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Bag类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pluck
def test_pluck():
d = {('x', 0): [(1, 10), (2, 20)],
('x', 1): [(3, 30), (4, 40)]}
b = Bag(d, 'x', 2)
assert set(b.pluck(0)) == set([1, 2, 3, 4])
assert set(b.pluck(1)) == set([10, 20, 30, 40])
assert set(b.pluck([1, 0])) == set([(10, 1), (20, 2), (30, 3), (40, 4)])
示例2: test_reductions_are_lazy
def test_reductions_are_lazy():
current = [None]
def part():
for i in range(10):
current[0] = i
yield i
def func(part):
assert current[0] == 0
return sum(part)
b = Bag({('foo', 0): part()}, 'foo', 1)
res = b.reduction(func, sum)
assert res.compute(get=dask.get) == sum(range(10))
示例3: test_to_dataframe
def test_to_dataframe():
dd = pytest.importorskip('dask.dataframe')
pd = pytest.importorskip('pandas')
def check_parts(df, sol):
assert all((p.dtypes == sol.dtypes).all() for p in
dask.compute(*df.to_delayed()))
dsk = {('test', 0): [(1, 2)],
('test', 1): [],
('test', 2): [(10, 20), (100, 200)]}
b = Bag(dsk, 'test', 3)
sol = pd.DataFrame(b.compute(), columns=['a', 'b'])
# Elements are tuples
df = b.to_dataframe()
dd.utils.assert_eq(df, sol.rename(columns={'a': 0, 'b': 1}),
check_index=False)
df = b.to_dataframe(columns=['a', 'b'])
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
df = b.to_dataframe(meta=[('a', 'i8'), ('b', 'i8')])
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Elements are dictionaries
b = b.map(lambda x: dict(zip(['a', 'b'], x)))
df = b.to_dataframe()
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
assert df._name == b.to_dataframe()._name
# With metadata specified
for meta in [sol, [('a', 'i8'), ('b', 'i8')]]:
df = b.to_dataframe(meta=meta)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Error to specify both columns and meta
with pytest.raises(ValueError):
b.to_dataframe(columns=['a', 'b'], meta=sol)
# Single column
b = b.pluck('a')
sol = sol[['a']]
df = b.to_dataframe(meta=sol)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)
# Works with iterators and tuples
sol = pd.DataFrame({'a': range(100)})
b = db.from_sequence(range(100), npartitions=5)
for f in [iter, tuple]:
df = b.map_partitions(f).to_dataframe(meta=sol)
dd.utils.assert_eq(df, sol, check_index=False)
check_parts(df, sol)