本文整理汇总了Python中pandas.crosstab方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.crosstab方法的具体用法?Python pandas.crosstab怎么用?Python pandas.crosstab使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.crosstab方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_distribution_of_lagged_choices
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_distribution_of_lagged_choices():
params, options, actual_df = rp.get_example_model("kw_97_extended")
options["n_periods"] = 1
options["simulated_agents"] = 10_000
simulate = rp.get_simulate_func(params, options)
df = simulate(params)
actual_df = actual_df.query("Period == 0")
expected = pd.crosstab(
actual_df.Lagged_Choice_1, actual_df.Experience_School, normalize="columns"
)
df = df.query("Period == 0")
calculated = pd.crosstab(
df.Lagged_Choice_1, df.Experience_School, normalize="columns"
)
# Allow for 4% differences which likely for small subsets.
np.testing.assert_allclose(expected, calculated, atol=0.04)
示例2: test_crosstab_ndarray
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_ndarray(self):
a = np.random.randint(0, 5, size=100)
b = np.random.randint(0, 3, size=100)
c = np.random.randint(0, 10, size=100)
df = DataFrame({'a': a, 'b': b, 'c': c})
result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
expected = crosstab(df['a'], [df['b'], df['c']])
tm.assert_frame_equal(result, expected)
result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
expected = crosstab([df['b'], df['c']], df['a'])
tm.assert_frame_equal(result, expected)
# assign arbitrary names
result = crosstab(self.df['A'].values, self.df['C'].values)
assert result.index.name == 'row_0'
assert result.columns.name == 'col_0'
示例3: test_crosstab_with_empties
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_with_empties(self):
# Check handling of empties
df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
'c': [np.nan, np.nan, np.nan, np.nan, np.nan]})
empty = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]],
index=pd.Index([1, 2],
name='a',
dtype='int64'),
columns=pd.Index([3, 4], name='b'))
for i in [True, 'index', 'columns']:
calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
normalize=i)
tm.assert_frame_equal(empty, calculated)
nans = pd.DataFrame([[0.0, np.nan], [0.0, 0.0]],
index=pd.Index([1, 2],
name='a',
dtype='int64'),
columns=pd.Index([3, 4], name='b'))
calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
normalize=False)
tm.assert_frame_equal(nans, calculated)
示例4: test_crosstab_errors
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_errors(self):
# Issue 12578
df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
'c': [1, 1, np.nan, 1, 1]})
error = 'values cannot be used without an aggfunc.'
with pytest.raises(ValueError, match=error):
pd.crosstab(df.a, df.b, values=df.c)
error = 'aggfunc cannot be used without values'
with pytest.raises(ValueError, match=error):
pd.crosstab(df.a, df.b, aggfunc=np.mean)
error = 'Not a valid normalize argument'
with pytest.raises(ValueError, match=error):
pd.crosstab(df.a, df.b, normalize='42')
with pytest.raises(ValueError, match=error):
pd.crosstab(df.a, df.b, normalize=42)
error = 'Not a valid margins argument'
with pytest.raises(ValueError, match=error):
pd.crosstab(df.a, df.b, normalize='all', margins=42)
示例5: test_mosaic_empty_cells
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_mosaic_empty_cells():
# SMOKE test see #2286
import pandas as pd
mydata = pd.DataFrame({'id2': {64: 'Angelica',
65: 'DXW_UID', 66: 'casuid01',
67: 'casuid01', 68: 'EC93_uid',
69: 'EC93_uid', 70: 'EC93_uid',
60: 'DXW_UID', 61: 'AtmosFox',
62: 'DXW_UID', 63: 'DXW_UID'},
'id1': {64: 'TGP',
65: 'Retention01', 66: 'default',
67: 'default', 68: 'Musa_EC_9_3',
69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',
60: 'default', 61: 'default',
62: 'default', 63: 'default'}})
ct = pd.crosstab(mydata.id1, mydata.id2)
fig, vals = mosaic(ct.T.unstack())
pylab.close('all')
fig, vals = mosaic(mydata, ['id1','id2'])
pylab.close('all')
示例6: test_SquareTable_from_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_SquareTable_from_data():
np.random.seed(434)
df = pd.DataFrame(index=range(100), columns=["v1", "v2"])
df["v1"] = np.random.randint(0, 5, 100)
df["v2"] = np.random.randint(0, 5, 100)
table = pd.crosstab(df["v1"], df["v2"])
rslt1 = ctab.SquareTable(table)
rslt2 = ctab.SquareTable.from_data(df)
rslt3 = ctab.SquareTable(np.asarray(table))
assert_equal(rslt1.summary().as_text(),
rslt2.summary().as_text())
assert_equal(rslt2.summary().as_text(),
rslt3.summary().as_text())
s = str(rslt1)
assert_equal(s.startswith('A 5x5 contingency table with counts:'), True)
assert_equal(rslt1.table[0, 0], 8.)
示例7: test_from_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_from_data(self):
np.random.seed(241)
df = pd.DataFrame(index=range(100), columns=("v1", "v2", "strat"))
df["v1"] = np.random.randint(0, 2, 100)
df["v2"] = np.random.randint(0, 2, 100)
df["strat"] = np.kron(np.arange(10), np.ones(10))
tables = []
for k in range(10):
ii = np.arange(10*k, 10*(k+1))
tables.append(pd.crosstab(df.loc[ii, "v1"], df.loc[ii, "v2"]))
rslt1 = ctab.StratifiedTable(tables)
rslt2 = ctab.StratifiedTable.from_data("v1", "v2", "strat", df)
assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text())
示例8: from_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def from_data(cls, data, shift_zeros=True):
"""
Construct a Table object from data.
Parameters
----------
data : array-like
The raw data, from which a contingency table is constructed
using the first two columns.
shift_zeros : boolean
If True and any cell count is zero, add 0.5 to all values
in the table.
Returns
-------
A Table instance.
"""
if isinstance(data, pd.DataFrame):
table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
else:
table = pd.crosstab(data[:, 0], data[:, 1])
return cls(table, shift_zeros)
示例9: test_crosstab_errors
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_errors(self):
# Issue 12578
df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
'c': [1, 1, np.nan, 1, 1]})
error = 'values cannot be used without an aggfunc.'
with tm.assert_raises_regex(ValueError, error):
pd.crosstab(df.a, df.b, values=df.c)
error = 'aggfunc cannot be used without values'
with tm.assert_raises_regex(ValueError, error):
pd.crosstab(df.a, df.b, aggfunc=np.mean)
error = 'Not a valid normalize argument'
with tm.assert_raises_regex(ValueError, error):
pd.crosstab(df.a, df.b, normalize='42')
with tm.assert_raises_regex(ValueError, error):
pd.crosstab(df.a, df.b, normalize=42)
error = 'Not a valid margins argument'
with tm.assert_raises_regex(ValueError, error):
pd.crosstab(df.a, df.b, normalize='all', margins=42)
示例10: SplitData
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def SplitData(self, df):
labels = df.iloc[:, -1]
data = df.iloc[:, :-1]
# use crosstab to count the frequency
cbs = (pd.crosstab(data.iloc[:, i], labels)
for i in range(data.columns.size))
y_c = labels.groupby(labels).count()
# entropy of y
HD = self.calH(y_c)
HDA = [self.calg(cb) for cb in cbs]
if self.method == "ID3":
g = HD-HDA
elif self.method == "C4.5":
g = 1-HDA/HD
if g.max() < self.eps:
return None
# the split location
split = g.argmax()
name = df.columns[split]
# divide into parts
gp = df.groupby(df.iloc[:, split])
return ((name, i, d.drop(name, axis=1)) for i, d in gp)
示例11: crosstab_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def crosstab_data(columns_var, row_var, data,unique_num,*args):
columns_data, columns_target, columns_bins = merger_data(data, columns_var, unique_num,args[0])
row_data, row_target, row_bins = merger_data(data, row_var, unique_num,args[1])
result = pd.crosstab(row_data, columns_data, margins=True, dropna=False)
if columns_bins is not None:
columns = result.columns.tolist()
columns.remove('All')
columns_bins_list = rename_columns(columns, columns_bins, args[2])
columns_bins_list.append('All')
result.set_axis(columns_bins_list, axis=1, inplace=True)
if row_bins is not None:
index = result.index.tolist()
index.remove('All')
index_bins_list = rename_columns(index, row_bins, args[3])
index_bins_list.append('All')
result.set_axis(index_bins_list, axis=0, inplace=True)
return result
# 写入所有高iv的变量分组和图到excel
示例12: crosstab_df
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def crosstab_df(labels, decisions):
"""
Parameters
------------
labels : array_like
containing categorical values like ['M', 'F']
decisions : array_like
containing boolean / binary values
Returns
--------
crosstab : 2x2 array
in the form,
False True
TopGroup 5 4
BottomGroup 3 4
so, crosstab = array([[5, 4], [3, 4]])
"""
labels, decisions = pd.Series(labels), pd.Series(decisions)
# rows are label values (e.g. ['F', 'M'])
# columns are decision values (e.g. [False, True])
ctab = pd.crosstab(labels, decisions)
return ctab
示例13: test_crosstab_single
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_single(self):
df = self.df
result = crosstab(df['A'], df['C'])
expected = df.groupby(['A', 'C']).size().unstack()
tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
示例14: test_crosstab_multiple
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_multiple(self):
df = self.df
result = crosstab(df['A'], [df['B'], df['C']])
expected = df.groupby(['A', 'B', 'C']).size()
expected = expected.unstack(
'B').unstack('C').fillna(0).astype(np.int64)
tm.assert_frame_equal(result, expected)
result = crosstab([df['B'], df['C']], df['A'])
expected = df.groupby(['B', 'C', 'A']).size()
expected = expected.unstack('A').fillna(0).astype(np.int64)
tm.assert_frame_equal(result, expected)
示例15: test_crosstab_non_aligned
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_non_aligned(self):
# GH 17005
a = pd.Series([0, 1, 1], index=['a', 'b', 'c'])
b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f'])
c = np.array([3, 4, 3])
expected = pd.DataFrame([[1, 0], [1, 1]],
index=Index([0, 1], name='row_0'),
columns=Index([3, 4], name='col_0'))
result = crosstab(a, b)
tm.assert_frame_equal(result, expected)
result = crosstab(a, c)
tm.assert_frame_equal(result, expected)