当前位置: 首页>>代码示例>>Python>>正文


Python pandas.crosstab方法代码示例

本文整理汇总了Python中pandas.crosstab方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.crosstab方法的具体用法?Python pandas.crosstab怎么用?Python pandas.crosstab使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.crosstab方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_distribution_of_lagged_choices

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_distribution_of_lagged_choices():
    params, options, actual_df = rp.get_example_model("kw_97_extended")

    options["n_periods"] = 1
    options["simulated_agents"] = 10_000

    simulate = rp.get_simulate_func(params, options)
    df = simulate(params)

    actual_df = actual_df.query("Period == 0")
    expected = pd.crosstab(
        actual_df.Lagged_Choice_1, actual_df.Experience_School, normalize="columns"
    )

    df = df.query("Period == 0")
    calculated = pd.crosstab(
        df.Lagged_Choice_1, df.Experience_School, normalize="columns"
    )

    # Allow for 4% differences which likely for small subsets.
    np.testing.assert_allclose(expected, calculated, atol=0.04) 
开发者ID:OpenSourceEconomics,项目名称:respy,代码行数:23,代码来源:test_replication_kw_97.py

示例2: test_crosstab_ndarray

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_ndarray(self):
        a = np.random.randint(0, 5, size=100)
        b = np.random.randint(0, 3, size=100)
        c = np.random.randint(0, 10, size=100)

        df = DataFrame({'a': a, 'b': b, 'c': c})

        result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
        expected = crosstab(df['a'], [df['b'], df['c']])
        tm.assert_frame_equal(result, expected)

        result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
        expected = crosstab([df['b'], df['c']], df['a'])
        tm.assert_frame_equal(result, expected)

        # assign arbitrary names
        result = crosstab(self.df['A'].values, self.df['C'].values)
        assert result.index.name == 'row_0'
        assert result.columns.name == 'col_0' 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:21,代码来源:test_pivot.py

示例3: test_crosstab_with_empties

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_with_empties(self):
        # Check handling of empties
        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [np.nan, np.nan, np.nan, np.nan, np.nan]})

        empty = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]],
                             index=pd.Index([1, 2],
                                            name='a',
                                            dtype='int64'),
                             columns=pd.Index([3, 4], name='b'))

        for i in [True, 'index', 'columns']:
            calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                     normalize=i)
            tm.assert_frame_equal(empty, calculated)

        nans = pd.DataFrame([[0.0, np.nan], [0.0, 0.0]],
                            index=pd.Index([1, 2],
                                           name='a',
                                           dtype='int64'),
                            columns=pd.Index([3, 4], name='b'))

        calculated = pd.crosstab(df.a, df.b, values=df.c, aggfunc='count',
                                 normalize=False)
        tm.assert_frame_equal(nans, calculated) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:27,代码来源:test_pivot.py

示例4: test_crosstab_errors

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_errors(self):
        # Issue 12578

        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [1, 1, np.nan, 1, 1]})

        error = 'values cannot be used without an aggfunc.'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, values=df.c)

        error = 'aggfunc cannot be used without values'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, aggfunc=np.mean)

        error = 'Not a valid normalize argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='42')

        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize=42)

        error = 'Not a valid margins argument'
        with pytest.raises(ValueError, match=error):
            pd.crosstab(df.a, df.b, normalize='all', margins=42) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:26,代码来源:test_pivot.py

示例5: test_mosaic_empty_cells

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_mosaic_empty_cells():
    # SMOKE test  see #2286
    import pandas as pd
    mydata = pd.DataFrame({'id2': {64: 'Angelica',
                                   65: 'DXW_UID', 66: 'casuid01',
                                   67: 'casuid01', 68: 'EC93_uid',
                                   69: 'EC93_uid', 70: 'EC93_uid',
                                   60: 'DXW_UID',  61: 'AtmosFox',
                                   62: 'DXW_UID', 63: 'DXW_UID'},
                           'id1': {64: 'TGP',
                                   65: 'Retention01', 66: 'default',
                                   67: 'default', 68: 'Musa_EC_9_3',
                                   69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',
                                   60: 'default', 61: 'default',
                                   62: 'default', 63: 'default'}})

    ct = pd.crosstab(mydata.id1, mydata.id2)
    fig, vals = mosaic(ct.T.unstack())
    pylab.close('all')
    fig, vals = mosaic(mydata, ['id1','id2'])
    pylab.close('all') 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:23,代码来源:test_mosaicplot.py

示例6: test_SquareTable_from_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_SquareTable_from_data():

    np.random.seed(434)
    df = pd.DataFrame(index=range(100), columns=["v1", "v2"])
    df["v1"] = np.random.randint(0, 5, 100)
    df["v2"] = np.random.randint(0, 5, 100)
    table = pd.crosstab(df["v1"], df["v2"])

    rslt1 = ctab.SquareTable(table)
    rslt2 = ctab.SquareTable.from_data(df)
    rslt3 = ctab.SquareTable(np.asarray(table))

    assert_equal(rslt1.summary().as_text(),
                 rslt2.summary().as_text())

    assert_equal(rslt2.summary().as_text(),
                 rslt3.summary().as_text())

    s = str(rslt1)
    assert_equal(s.startswith('A 5x5 contingency table with counts:'), True)
    assert_equal(rslt1.table[0, 0], 8.) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:23,代码来源:test_contingency_tables.py

示例7: test_from_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_from_data(self):

        np.random.seed(241)
        df = pd.DataFrame(index=range(100), columns=("v1", "v2", "strat"))
        df["v1"] = np.random.randint(0, 2, 100)
        df["v2"] = np.random.randint(0, 2, 100)
        df["strat"] = np.kron(np.arange(10), np.ones(10))

        tables = []
        for k in range(10):
            ii = np.arange(10*k, 10*(k+1))
            tables.append(pd.crosstab(df.loc[ii, "v1"], df.loc[ii, "v2"]))

        rslt1 = ctab.StratifiedTable(tables)
        rslt2 = ctab.StratifiedTable.from_data("v1", "v2", "strat", df)

        assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text()) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:19,代码来源:test_contingency_tables.py

示例8: from_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def from_data(cls, data, shift_zeros=True):
        """
        Construct a Table object from data.

        Parameters
        ----------
        data : array-like
            The raw data, from which a contingency table is constructed
            using the first two columns.
        shift_zeros : boolean
            If True and any cell count is zero, add 0.5 to all values
            in the table.

        Returns
        -------
        A Table instance.
        """

        if isinstance(data, pd.DataFrame):
            table = pd.crosstab(data.iloc[:, 0], data.iloc[:, 1])
        else:
            table = pd.crosstab(data[:, 0], data[:, 1])

        return cls(table, shift_zeros) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:26,代码来源:contingency_tables.py

示例9: test_crosstab_errors

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_errors(self):
        # Issue 12578

        df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
                           'c': [1, 1, np.nan, 1, 1]})

        error = 'values cannot be used without an aggfunc.'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, values=df.c)

        error = 'aggfunc cannot be used without values'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, aggfunc=np.mean)

        error = 'Not a valid normalize argument'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize='42')

        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize=42)

        error = 'Not a valid margins argument'
        with tm.assert_raises_regex(ValueError, error):
            pd.crosstab(df.a, df.b, normalize='all', margins=42) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:26,代码来源:test_pivot.py

示例10: SplitData

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def SplitData(self, df):
        labels = df.iloc[:, -1]
        data = df.iloc[:, :-1]
        # use crosstab to count the frequency
        cbs = (pd.crosstab(data.iloc[:, i], labels)
               for i in range(data.columns.size))
        y_c = labels.groupby(labels).count()
        # entropy of y
        HD = self.calH(y_c)
        HDA = [self.calg(cb) for cb in cbs]
        if self.method == "ID3":
            g = HD-HDA
        elif self.method == "C4.5":
            g = 1-HDA/HD
        if g.max() < self.eps:
            return None
        # the split location
        split = g.argmax()
        name = df.columns[split]
        # divide into parts
        gp = df.groupby(df.iloc[:, split])
        return ((name, i, d.drop(name, axis=1)) for i, d in gp) 
开发者ID:cherichy,项目名称:statistical_learning,代码行数:24,代码来源:decisionTree.py

示例11: crosstab_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def crosstab_data(columns_var, row_var, data,unique_num,*args):
    columns_data, columns_target, columns_bins = merger_data(data, columns_var, unique_num,args[0])
    row_data, row_target, row_bins = merger_data(data, row_var, unique_num,args[1])
    result = pd.crosstab(row_data, columns_data, margins=True, dropna=False)
    if columns_bins is not None:
        columns = result.columns.tolist()
        columns.remove('All')
        columns_bins_list = rename_columns(columns, columns_bins, args[2])
        columns_bins_list.append('All')
        result.set_axis(columns_bins_list, axis=1, inplace=True)
    if row_bins is not None:
        index = result.index.tolist()
        index.remove('All')
        index_bins_list = rename_columns(index, row_bins, args[3])
        index_bins_list.append('All')
        result.set_axis(index_bins_list, axis=0, inplace=True)
    return result


# 写入所有高iv的变量分组和图到excel 
开发者ID:amphibian-dev,项目名称:toad,代码行数:22,代码来源:evaluate.py

示例12: crosstab_df

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def crosstab_df(labels, decisions):
    """
    Parameters
    ------------
    labels : array_like
        containing categorical values like ['M', 'F']
    decisions : array_like
        containing boolean / binary values

    Returns
    --------
    crosstab : 2x2 array
        in the form,
                    False True
        TopGroup       5    4
        BottomGroup    3    4
        so, crosstab = array([[5, 4], [3, 4]])
    """
    labels, decisions = pd.Series(labels), pd.Series(decisions)
    # rows are label values (e.g. ['F', 'M'])
    # columns are decision values (e.g. [False, True])
    ctab = pd.crosstab(labels, decisions)
    return ctab 
开发者ID:pymetrics,项目名称:audit-ai,代码行数:25,代码来源:crosstabs.py

示例13: test_crosstab_single

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_single(self):
        df = self.df
        result = crosstab(df['A'], df['C'])
        expected = df.groupby(['A', 'C']).size().unstack()
        tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64)) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:7,代码来源:test_pivot.py

示例14: test_crosstab_multiple

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_multiple(self):
        df = self.df

        result = crosstab(df['A'], [df['B'], df['C']])
        expected = df.groupby(['A', 'B', 'C']).size()
        expected = expected.unstack(
            'B').unstack('C').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected)

        result = crosstab([df['B'], df['C']], df['A'])
        expected = df.groupby(['B', 'C', 'A']).size()
        expected = expected.unstack('A').fillna(0).astype(np.int64)
        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:15,代码来源:test_pivot.py

示例15: test_crosstab_non_aligned

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import crosstab [as 别名]
def test_crosstab_non_aligned(self):
        # GH 17005
        a = pd.Series([0, 1, 1], index=['a', 'b', 'c'])
        b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f'])
        c = np.array([3, 4, 3])

        expected = pd.DataFrame([[1, 0], [1, 1]],
                                index=Index([0, 1], name='row_0'),
                                columns=Index([3, 4], name='col_0'))

        result = crosstab(a, b)
        tm.assert_frame_equal(result, expected)

        result = crosstab(a, c)
        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:17,代码来源:test_pivot.py


注:本文中的pandas.crosstab方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。