本文整理汇总了Python中pandas.core.sparse.api.SparseDataFrame类的典型用法代码示例。如果您正苦于以下问题:Python SparseDataFrame类的具体用法?Python SparseDataFrame怎么用?Python SparseDataFrame使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SparseDataFrame类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup_method
def setup_method(self, method):
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10, dtype=np.float64),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
self.dates = bdate_range('1/1/2011', periods=10)
self.orig = pd.DataFrame(self.data, index=self.dates)
self.iorig = pd.DataFrame(self.data, index=self.dates)
self.frame = SparseDataFrame(self.data, index=self.dates)
self.iframe = SparseDataFrame(self.data, index=self.dates,
default_kind='integer')
self.mixed_frame = self.frame.copy(False)
self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates))
values = self.frame.values.copy()
values[np.isnan(values)] = 0
self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=0, index=self.dates)
values = self.frame.values.copy()
values[np.isnan(values)] = 2
self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=2,
index=self.dates)
self.empty = SparseDataFrame()
示例2: setUp
def setUp(self):
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10, dtype=np.float64),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
self.dates = bdate_range('1/1/2011', periods=10)
self.orig = pd.DataFrame(self.data, index=self.dates)
self.iorig = pd.DataFrame(self.data, index=self.dates)
self.frame = SparseDataFrame(self.data, index=self.dates)
self.iframe = SparseDataFrame(self.data, index=self.dates,
default_kind='integer')
values = self.frame.values.copy()
values[np.isnan(values)] = 0
self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=0, index=self.dates)
values = self.frame.values.copy()
values[np.isnan(values)] = 2
self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=2,
index=self.dates)
self.empty = SparseDataFrame()
示例3: test_fill_value_when_combine_const
def test_fill_value_when_combine_const(self):
# GH12723
dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
df = SparseDataFrame({'foo': dat}, index=range(6))
exp = df.fillna(0).add(2)
res = df.add(2, fill_value=0)
tm.assert_sp_frame_equal(res, exp)
示例4: test_getitem
def test_getitem(self):
# 1585 select multiple columns
sdf = SparseDataFrame(index=[0, 1, 2], columns=['a', 'b', 'c'])
result = sdf[['a', 'b']]
exp = sdf.reindex(columns=['a', 'b'])
tm.assert_sp_frame_equal(result, exp)
pytest.raises(Exception, sdf.__getitem__, ['a', 'd'])
示例5: test_as_matrix
def test_as_matrix(self):
empty = self.empty.as_matrix()
self.assertEqual(empty.shape, (0, 0))
no_cols = SparseDataFrame(index=np.arange(10))
mat = no_cols.as_matrix()
self.assertEqual(mat.shape, (10, 0))
no_index = SparseDataFrame(columns=np.arange(10))
mat = no_index.as_matrix()
self.assertEqual(mat.shape, (0, 10))
示例6: test_as_matrix
def test_as_matrix(self):
empty = self.empty.as_matrix()
assert empty.shape == (0, 0)
no_cols = SparseDataFrame(index=np.arange(10))
mat = no_cols.as_matrix()
assert mat.shape == (10, 0)
no_index = SparseDataFrame(columns=np.arange(10))
mat = no_index.as_matrix()
assert mat.shape == (0, 10)
示例7: test_quantile_multi
def test_quantile_multi(self):
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
q = [0.1, 0.5]
sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)
dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseDataFrame(dense_expected)
tm.assert_frame_equal(result, dense_expected)
tm.assert_sp_frame_equal(result, sparse_expected)
示例8: test_quantile
def test_quantile(self):
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
q = 0.1
sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)
dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseSeries(dense_expected)
tm.assert_series_equal(result, dense_expected)
tm.assert_sp_series_equal(result, sparse_expected)
示例9: TestSparseDataFrameAnalytics
class TestSparseDataFrameAnalytics(tm.TestCase):
def setUp(self):
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10, dtype=float),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
self.dates = bdate_range('1/1/2011', periods=10)
self.frame = SparseDataFrame(self.data, index=self.dates)
def test_cumsum(self):
expected = SparseDataFrame(self.frame.to_dense().cumsum())
result = self.frame.cumsum()
tm.assert_sp_frame_equal(result, expected)
result = self.frame.cumsum(axis=None)
tm.assert_sp_frame_equal(result, expected)
result = self.frame.cumsum(axis=0)
tm.assert_sp_frame_equal(result, expected)
def test_numpy_cumsum(self):
result = np.cumsum(self.frame)
expected = SparseDataFrame(self.frame.to_dense().cumsum())
tm.assert_sp_frame_equal(result, expected)
msg = "the 'dtype' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
self.frame, dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
self.frame, out=result)
def test_numpy_func_call(self):
# no exception should be raised even though
# numpy passes in 'axis=None' or `axis=-1'
funcs = ['sum', 'cumsum', 'var',
'mean', 'prod', 'cumprod',
'std', 'min', 'max']
for func in funcs:
getattr(np, func)(self.frame)
示例10: test_reindex_method
def test_reindex_method(self):
sparse = SparseDataFrame(data=[[11., 12., 14.],
[21., 22., 24.],
[41., 42., 44.]],
index=[1, 2, 4],
columns=[1, 2, 4],
dtype=float)
# Over indices
# default method
result = sparse.reindex(index=range(6))
expected = SparseDataFrame(data=[[nan, nan, nan],
[11., 12., 14.],
[21., 22., 24.],
[nan, nan, nan],
[41., 42., 44.],
[nan, nan, nan]],
index=range(6),
columns=[1, 2, 4],
dtype=float)
tm.assert_sp_frame_equal(result, expected)
# method='bfill'
result = sparse.reindex(index=range(6), method='bfill')
expected = SparseDataFrame(data=[[11., 12., 14.],
[11., 12., 14.],
[21., 22., 24.],
[41., 42., 44.],
[41., 42., 44.],
[nan, nan, nan]],
index=range(6),
columns=[1, 2, 4],
dtype=float)
tm.assert_sp_frame_equal(result, expected)
# method='ffill'
result = sparse.reindex(index=range(6), method='ffill')
expected = SparseDataFrame(data=[[nan, nan, nan],
[11., 12., 14.],
[21., 22., 24.],
[21., 22., 24.],
[41., 42., 44.],
[41., 42., 44.]],
index=range(6),
columns=[1, 2, 4],
dtype=float)
tm.assert_sp_frame_equal(result, expected)
# Over columns
# default method
result = sparse.reindex(columns=range(6))
expected = SparseDataFrame(data=[[nan, 11., 12., nan, 14., nan],
[nan, 21., 22., nan, 24., nan],
[nan, 41., 42., nan, 44., nan]],
index=[1, 2, 4],
columns=range(6),
dtype=float)
tm.assert_sp_frame_equal(result, expected)
# method='bfill'
with pytest.raises(NotImplementedError):
sparse.reindex(columns=range(6), method='bfill')
# method='ffill'
with pytest.raises(NotImplementedError):
sparse.reindex(columns=range(6), method='ffill')
示例11: TestSparseDataFrame
class TestSparseDataFrame(SharedWithSparse):
klass = SparseDataFrame
# SharedWithSparse tests use generic, klass-agnostic assertion
_assert_frame_equal = staticmethod(tm.assert_sp_frame_equal)
_assert_series_equal = staticmethod(tm.assert_sp_series_equal)
def setup_method(self, method):
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10, dtype=np.float64),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
self.dates = bdate_range('1/1/2011', periods=10)
self.orig = pd.DataFrame(self.data, index=self.dates)
self.iorig = pd.DataFrame(self.data, index=self.dates)
self.frame = SparseDataFrame(self.data, index=self.dates)
self.iframe = SparseDataFrame(self.data, index=self.dates,
default_kind='integer')
self.mixed_frame = self.frame.copy(False)
self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates))
values = self.frame.values.copy()
values[np.isnan(values)] = 0
self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=0, index=self.dates)
values = self.frame.values.copy()
values[np.isnan(values)] = 2
self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
index=self.dates)
self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
default_fill_value=2,
index=self.dates)
self.empty = SparseDataFrame()
def test_fill_value_when_combine_const(self):
# GH12723
dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
df = SparseDataFrame({'foo': dat}, index=range(6))
exp = df.fillna(0).add(2)
res = df.add(2, fill_value=0)
tm.assert_sp_frame_equal(res, exp)
def test_values(self):
empty = self.empty.values
assert empty.shape == (0, 0)
no_cols = SparseDataFrame(index=np.arange(10))
mat = no_cols.values
assert mat.shape == (10, 0)
no_index = SparseDataFrame(columns=np.arange(10))
mat = no_index.values
assert mat.shape == (0, 10)
def test_copy(self):
cp = self.frame.copy()
assert isinstance(cp, SparseDataFrame)
tm.assert_sp_frame_equal(cp, self.frame)
# as of v0.15.0
# this is now identical (but not is_a )
assert cp.index.identical(self.frame.index)
def test_constructor(self):
for col, series in compat.iteritems(self.frame):
assert isinstance(series, SparseSeries)
assert isinstance(self.iframe['A'].sp_index, IntIndex)
# constructed zframe from matrix above
assert self.zframe['A'].fill_value == 0
tm.assert_numpy_array_equal(pd.SparseArray([1., 2., 3., 4., 5., 6.]),
self.zframe['A'].values)
tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2.,
3., 4., 5., 6.]),
self.zframe['A'].to_dense().values)
# construct no data
sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10))
for col, series in compat.iteritems(sdf):
assert isinstance(series, SparseSeries)
# construct from nested dict
data = {}
for c, s in compat.iteritems(self.frame):
data[c] = s.to_dict()
sdf = SparseDataFrame(data)
tm.assert_sp_frame_equal(sdf, self.frame)
#.........这里部分代码省略.........
示例12: TestSparseDataFrameAnalytics
class TestSparseDataFrameAnalytics(object):
def setup_method(self, method):
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10, dtype=float),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
self.dates = bdate_range('1/1/2011', periods=10)
self.frame = SparseDataFrame(self.data, index=self.dates)
def test_cumsum(self):
expected = SparseDataFrame(self.frame.to_dense().cumsum())
result = self.frame.cumsum()
tm.assert_sp_frame_equal(result, expected)
result = self.frame.cumsum(axis=None)
tm.assert_sp_frame_equal(result, expected)
result = self.frame.cumsum(axis=0)
tm.assert_sp_frame_equal(result, expected)
def test_numpy_cumsum(self):
result = np.cumsum(self.frame)
expected = SparseDataFrame(self.frame.to_dense().cumsum())
tm.assert_sp_frame_equal(result, expected)
msg = "the 'dtype' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
self.frame, dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
self.frame, out=result)
def test_numpy_func_call(self):
# no exception should be raised even though
# numpy passes in 'axis=None' or `axis=-1'
funcs = ['sum', 'cumsum', 'var',
'mean', 'prod', 'cumprod',
'std', 'min', 'max']
for func in funcs:
getattr(np, func)(self.frame)
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile(self):
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
q = 0.1
sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)
dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseSeries(dense_expected)
tm.assert_series_equal(result, dense_expected)
tm.assert_sp_series_equal(result, sparse_expected)
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile_multi(self):
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
q = [0.1, 0.5]
sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)
dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseDataFrame(dense_expected)
tm.assert_frame_equal(result, dense_expected)
tm.assert_sp_frame_equal(result, sparse_expected)
def test_assign_with_sparse_frame(self):
# GH 19163
df = pd.DataFrame({"a": [1, 2, 3]})
res = df.to_sparse(fill_value=False).assign(newcol=False)
exp = df.assign(newcol=False).to_sparse(fill_value=False)
tm.assert_sp_frame_equal(res, exp)
for column in res.columns:
assert type(res[column]) is SparseSeries