本文整理汇总了Python中pandas.Grouper方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.Grouper方法的具体用法?Python pandas.Grouper怎么用?Python pandas.Grouper使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.Grouper方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_nunique_with_timegrouper
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_nunique_with_timegrouper():
# GH 13453
test = pd.DataFrame({
'time': [Timestamp('2016-06-28 09:35:35'),
Timestamp('2016-06-28 16:09:30'),
Timestamp('2016-06-28 16:46:28')],
'data': ['1', '2', '3']}).set_index('time')
result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
expected = test.groupby(
pd.Grouper(freq='h')
)['data'].apply(pd.Series.nunique)
tm.assert_series_equal(result, expected)
# count
# --------------------------------
示例2: test_timegrouper_apply_return_type_series
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_timegrouper_apply_return_type_series(self):
# Using `apply` with the `TimeGrouper` should give the
# same return type as an `apply` with a `Grouper`.
# Issue #11742
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
'value': [10, 13]})
df_dt = df.copy()
df_dt['date'] = pd.to_datetime(df_dt['date'])
def sumfunc_series(x):
return pd.Series([x['value'].sum()], ('sum',))
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
.apply(sumfunc_series))
assert_frame_equal(result.reset_index(drop=True),
expected.reset_index(drop=True))
示例3: test_timegrouper_apply_return_type_value
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_timegrouper_apply_return_type_value(self):
# Using `apply` with the `TimeGrouper` should give the
# same return type as an `apply` with a `Grouper`.
# Issue #11742
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
'value': [10, 13]})
df_dt = df.copy()
df_dt['date'] = pd.to_datetime(df_dt['date'])
def sumfunc_value(x):
return x.value.sum()
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
.apply(sumfunc_value))
assert_series_equal(result.reset_index(drop=True),
expected.reset_index(drop=True))
示例4: test_groupby_agg_ohlc_non_first
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_groupby_agg_ohlc_non_first():
# GH 21716
df = pd.DataFrame([[1], [1]], columns=['foo'],
index=pd.date_range('2018-01-01', periods=2, freq='D'))
expected = pd.DataFrame([
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]
], columns=pd.MultiIndex.from_tuples((
('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'),
('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'),
('foo', 'sum', 'foo'))), index=pd.date_range(
'2018-01-01', periods=2, freq='D'))
result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])
tm.assert_frame_equal(result, expected)
示例5: test_grouper_multilevel_freq
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_grouper_multilevel_freq(self):
# GH 7885
# with level and freq specified in a pd.Grouper
from datetime import date, timedelta
d0 = date.today() - timedelta(days=14)
dates = date_range(d0, date.today())
date_index = pd.MultiIndex.from_product(
[dates, dates], names=['foo', 'bar'])
df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index)
# Check string level
expected = df.reset_index().groupby([pd.Grouper(
key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum()
# reset index changes columns dtype to object
expected.columns = pd.Index([0], dtype='int64')
result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(
level='bar', freq='W')]).sum()
assert_frame_equal(result, expected)
# Check integer level
result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper(
level=1, freq='W')]).sum()
assert_frame_equal(result, expected)
示例6: test_grouper_getting_correct_binner
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_grouper_getting_correct_binner(self):
# GH 10063
# using a non-time-based grouper and a time-based grouper
# and specifying levels
df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product(
[list('ab'), date_range('20130101', periods=80)], names=['one',
'two']))
result = df.groupby([pd.Grouper(level='one'), pd.Grouper(
level='two', freq='M')]).sum()
expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]},
index=MultiIndex.from_product(
[list('ab'),
date_range('20130101', freq='M', periods=3)],
names=['one', 'two']))
assert_frame_equal(result, expected)
示例7: test_groupby_grouper_f_sanity_checked
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_groupby_grouper_f_sanity_checked(self):
dates = date_range('01-Jan-2013', periods=12, freq='MS')
ts = Series(np.random.randn(12), index=dates)
# GH3035
# index.map is used to apply grouper to the index
# if it fails on the elements, map tries it on the entire index as
# a sequence. That can yield invalid results that cause trouble
# down the line.
# the surprise comes from using key[0:6] rather then str(key)[0:6]
# when the elements are Timestamp.
# the result is Index[0:6], very confusing.
msg = r"Grouper result violates len\(labels\) == len\(data\)"
with pytest.raises(AssertionError, match=msg):
ts.groupby(lambda key: key[0:6])
示例8: test_list_grouper_with_nat
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_list_grouper_with_nat(self):
# GH 14715
df = pd.DataFrame({'date': pd.date_range('1/1/2011',
periods=365, freq='D')})
df.iloc[-1] = pd.NaT
grouper = pd.Grouper(key='date', freq='AS')
# Grouper in a list grouping
result = df.groupby([grouper])
expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))}
tm.assert_dict_equal(result.groups, expected)
# Test case without a list
result = df.groupby(grouper)
expected = {pd.Timestamp('2011-01-01'): 365}
tm.assert_dict_equal(result.groups, expected)
# get_group
# --------------------------------
示例9: test_resample_nonexistent_time_bin_edge
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_resample_nonexistent_time_bin_edge(self):
# GH 19375
index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
s = Series(np.zeros(len(index)), index=index)
expected = s.tz_localize('US/Pacific')
result = expected.resample('900S').mean()
tm.assert_series_equal(result, expected)
# GH 23742
index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
df = DataFrame(data=list(range(len(index))), index=index)
result = df.groupby(pd.Grouper(freq='1D')).count()
expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
tz="America/Sao_Paulo",
nonexistent='shift_forward', closed='left')
tm.assert_index_equal(result.index, expected)
示例10: test_resample_nunique
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_resample_nunique():
# GH 12352
df = DataFrame({
'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
Timestamp('2015-06-08 00:00:00'): '0010150847'},
'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
r = df.resample('D')
g = df.groupby(pd.Grouper(freq='D'))
expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
x.nunique())
assert expected.name == 'ID'
for t in [r, g]:
result = r.ID.nunique()
assert_series_equal(result, expected)
result = df.ID.resample('D').nunique()
assert_series_equal(result, expected)
result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
assert_series_equal(result, expected)
示例11: test_apply_with_mutated_index
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_apply_with_mutated_index():
# GH 15169
index = pd.date_range('1-1-2015', '12-31-15', freq='D')
df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
def f(x):
s = Series([1, 2], index=['a', 'b'])
return s
expected = df.groupby(pd.Grouper(freq='M')).apply(f)
result = df.resample('M').apply(f)
assert_frame_equal(result, expected)
# A case for series
expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
result = df['col1'].resample('M').apply(f)
assert_series_equal(result, expected)
示例12: test_resample_nunique
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_resample_nunique(self):
# GH 12352
df = DataFrame({
'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
Timestamp('2015-06-08 00:00:00'): '0010150847'},
'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
r = df.resample('D')
g = df.groupby(pd.Grouper(freq='D'))
expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
x.nunique())
assert expected.name == 'ID'
for t in [r, g]:
result = r.ID.nunique()
assert_series_equal(result, expected)
result = df.ID.resample('D').nunique()
assert_series_equal(result, expected)
result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
assert_series_equal(result, expected)
示例13: test_scalar_call_versus_list_call
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_scalar_call_versus_list_call(self):
# Issue: 17530
data_frame = {
'location': ['shanghai', 'beijing', 'shanghai'],
'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
'2017-08-11 22:23:15'],
dtype='datetime64[ns]'),
'value': [1, 2, 3]
}
data_frame = pd.DataFrame(data_frame).set_index('time')
grouper = pd.Grouper(freq='D')
grouped = data_frame.groupby(grouper)
result = grouped.count()
grouped = data_frame.groupby([grouper])
expected = grouped.count()
assert_frame_equal(result, expected)
示例14: __init__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def __init__(self, time_grouper=MONTH_GROUPER, return_anoms=True, **qm_kwargs):
if isinstance(time_grouper, str):
self.time_grouper = pd.Grouper(freq=time_grouper)
else:
self.time_grouper = time_grouper
self.return_anoms = return_anoms
self.qm_kwargs = qm_kwargs
示例15: test_pivot_no_values
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Grouper [as 别名]
def test_pivot_no_values(self):
# GH 14380
idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-01-02',
'2011-01-01', '2011-01-02'])
df = pd.DataFrame({'A': [1, 2, 3, 4, 5]},
index=idx)
res = df.pivot_table(index=df.index.month, columns=df.index.day)
exp_columns = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)])
exp = pd.DataFrame([[2.5, 4.0], [2.0, np.nan]],
index=[1, 2], columns=exp_columns)
tm.assert_frame_equal(res, exp)
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],
'dt': pd.date_range('2011-01-01', freq='D',
periods=5)},
index=idx)
res = df.pivot_table(index=df.index.month,
columns=pd.Grouper(key='dt', freq='M'))
exp_columns = pd.MultiIndex.from_tuples([('A',
pd.Timestamp('2011-01-31'))])
exp_columns.names = [None, 'dt']
exp = pd.DataFrame([3.25, 2.0],
index=[1, 2], columns=exp_columns)
tm.assert_frame_equal(res, exp)
res = df.pivot_table(index=pd.Grouper(freq='A'),
columns=pd.Grouper(key='dt', freq='M'))
exp = pd.DataFrame([3],
index=pd.DatetimeIndex(['2011-12-31']),
columns=exp_columns)
tm.assert_frame_equal(res, exp)