当前位置: 首页>>代码示例>>Python>>正文


Python pandas.get_dummies方法代码示例

本文整理汇总了Python中pandas.get_dummies方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.get_dummies方法的具体用法?Python pandas.get_dummies怎么用?Python pandas.get_dummies使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.get_dummies方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: model

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10):
    all_attributes = pd.get_dummies(profiles).columns
    profiles_dummies = pd.get_dummies(profiles, drop_first=True)
    choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1)

    respondants = selections.columns
    n_attributes_in_model = profiles_dummies.shape[1]
    n_participants = selections.shape[1]

    with pm.Model():

        # https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf
        # need to include the covariance matrix as a parent of `partsworth`
        alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model))
        partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model))

        cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())]

        trace = pm.sample(sample)
    return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes) 
开发者ID:CamDavidsonPilon,项目名称:lifestyles,代码行数:22,代码来源:cbc_hb.py

示例2: test_basic

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic(self, sparse, dtype):
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'a': [1, 0, 0],
                              'b': [0, 1, 0],
                              'c': [0, 0, 1]},
                             dtype=self.effective_dtype(dtype))
        if sparse:
            expected = expected.apply(pd.SparseArray, fill_value=0.0)
        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
        assert_frame_equal(result, expected)

        expected.index = list('ABC')
        result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:22,代码来源:test_reshape.py

示例3: test_just_na

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_just_na(self, sparse):
        just_na_list = [np.nan]
        just_na_series = Series(just_na_list)
        just_na_series_index = Series(just_na_list, index=['A'])

        res_list = get_dummies(just_na_list, sparse=sparse)
        res_series = get_dummies(just_na_series, sparse=sparse)
        res_series_index = get_dummies(just_na_series_index, sparse=sparse)

        assert res_list.empty
        assert res_series.empty
        assert res_series_index.empty

        assert res_list.index.tolist() == [0]
        assert res_series.index.tolist() == [0]
        assert res_series_index.index.tolist() == ['A'] 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_reshape.py

示例4: test_dataframe_dummies_all_obj

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_all_obj(self, df, sparse):
        df = df[['A', 'B']]
        result = get_dummies(df, sparse=sparse)
        expected = DataFrame({'A_a': [1, 0, 1],
                              'A_b': [0, 1, 0],
                              'B_b': [1, 1, 0],
                              'B_c': [0, 0, 1]},
                             dtype=np.uint8)
        if sparse:
            expected = pd.DataFrame({
                "A_a": pd.SparseArray([1, 0, 1], dtype='uint8'),
                "A_b": pd.SparseArray([0, 1, 0], dtype='uint8'),
                "B_b": pd.SparseArray([1, 1, 0], dtype='uint8'),
                "B_c": pd.SparseArray([0, 0, 1], dtype='uint8'),
            })

        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_reshape.py

示例5: test_dataframe_dummies_prefix_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_list(self, df, sparse):
        prefixes = ['from_A', 'from_B']
        result = get_dummies(df, prefix=prefixes, sparse=sparse)
        expected = DataFrame({'C': [1, 2, 3],
                              'from_A_a': [1, 0, 1],
                              'from_A_b': [0, 1, 0],
                              'from_B_b': [1, 1, 0],
                              'from_B_c': [0, 0, 1]},
                             dtype=np.uint8)
        expected[['C']] = df[['C']]
        cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
        expected = expected[['C'] + cols]

        typ = pd.SparseArray if sparse else pd.Series
        expected[cols] = expected[cols].apply(lambda x: typ(x))
        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_reshape.py

示例6: test_dataframe_dummies_prefix_str

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_str(self, df, sparse):
        # not that you should do this...
        result = get_dummies(df, prefix='bad', sparse=sparse)
        bad_columns = ['bad_a', 'bad_b', 'bad_b', 'bad_c']
        expected = DataFrame([[1, 1, 0, 1, 0],
                              [2, 0, 1, 1, 0],
                              [3, 1, 0, 0, 1]],
                             columns=['C'] + bad_columns,
                             dtype=np.uint8)
        expected = expected.astype({"C": np.int64})
        if sparse:
            # work around astyping & assigning with duplicate columns
            # https://github.com/pandas-dev/pandas/issues/14427
            expected = pd.concat([
                pd.Series([1, 2, 3], name='C'),
                pd.Series([1, 0, 1], name='bad_a', dtype='Sparse[uint8]'),
                pd.Series([0, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
                pd.Series([1, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
                pd.Series([0, 0, 1], name='bad_c', dtype='Sparse[uint8]'),
            ], axis=1)

        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:24,代码来源:test_reshape.py

示例7: test_dataframe_dummies_prefix_dict

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_dict(self, sparse):
        prefixes = {'A': 'from_A', 'B': 'from_B'}
        df = DataFrame({'C': [1, 2, 3],
                        'A': ['a', 'b', 'a'],
                        'B': ['b', 'b', 'c']})
        result = get_dummies(df, prefix=prefixes, sparse=sparse)

        expected = DataFrame({'C': [1, 2, 3],
                              'from_A_a': [1, 0, 1],
                              'from_A_b': [0, 1, 0],
                              'from_B_b': [1, 1, 0],
                              'from_B_c': [0, 0, 1]})

        columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
        expected[columns] = expected[columns].astype(np.uint8)
        if sparse:
            expected[columns] = expected[columns].apply(
                lambda x: pd.SparseSeries(x)
            )

        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:23,代码来源:test_reshape.py

示例8: test_dataframe_dummies_with_categorical

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
        df['cat'] = pd.Categorical(['x', 'y', 'y'])
        result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
        if sparse:
            arr = SparseArray
            typ = SparseDtype(dtype, 0)
        else:
            arr = np.array
            typ = dtype

        expected = DataFrame({'C': [1, 2, 3],
                              'A_a': arr([1, 0, 1], dtype=typ),
                              'A_b': arr([0, 1, 0], dtype=typ),
                              'B_b': arr([1, 1, 0], dtype=typ),
                              'B_c': arr([0, 0, 1], dtype=typ),
                              'cat_x': arr([1, 0, 0], dtype=typ),
                              'cat_y': arr([0, 1, 1], dtype=typ)
                              }).sort_index(axis=1)

        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:22,代码来源:test_reshape.py

示例9: test_basic_drop_first

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first(self, sparse):
        # GH12402 Add a new parameter `drop_first` to avoid collinearity
        # Basic case
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'b': [0, 1, 0],
                              'c': [0, 0, 1]},
                             dtype=np.uint8)

        result = get_dummies(s_list, drop_first=True, sparse=sparse)
        if sparse:
            expected = expected.apply(pd.SparseArray, fill_value=0)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, drop_first=True, sparse=sparse)
        assert_frame_equal(result, expected)

        expected.index = list('ABC')
        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:24,代码来源:test_reshape.py

示例10: test_basic_drop_first_one_level

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first_one_level(self, sparse):
        # Test the case that categorical variable only has one level.
        s_list = list('aaa')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame(index=np.arange(3))

        result = get_dummies(s_list, drop_first=True, sparse=sparse)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, drop_first=True, sparse=sparse)
        assert_frame_equal(result, expected)

        expected = DataFrame(index=list('ABC'))
        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_reshape.py

示例11: test_basic_drop_first_NA

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first_NA(self, sparse):
        # Test NA handling together with drop_first
        s_NA = ['a', 'b', np.nan]
        res = get_dummies(s_NA, drop_first=True, sparse=sparse)
        exp = DataFrame({'b': [0, 1, 0]}, dtype=np.uint8)
        if sparse:
            exp = exp.apply(pd.SparseArray, fill_value=0)

        assert_frame_equal(res, exp)

        res_na = get_dummies(s_NA, dummy_na=True, drop_first=True,
                             sparse=sparse)
        exp_na = DataFrame(
            {'b': [0, 1, 0],
             nan: [0, 0, 1]},
            dtype=np.uint8).reindex(['b', nan], axis=1)
        if sparse:
            exp_na = exp_na.apply(pd.SparseArray, fill_value=0)
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, drop_first=True,
                                  sparse=sparse)
        exp_just_na = DataFrame(index=np.arange(1))
        assert_frame_equal(res_just_na, exp_just_na) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:26,代码来源:test_reshape.py

示例12: test_int_int

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_int_int(self):
        data = Series([1, 2, 1])
        result = pd.get_dummies(data)
        expected = DataFrame([[1, 0],
                              [0, 1],
                              [1, 0]],
                             columns=[1, 2],
                             dtype=np.uint8)
        tm.assert_frame_equal(result, expected)

        data = Series(pd.Categorical(['a', 'b', 'a']))
        result = pd.get_dummies(data)
        expected = DataFrame([[1, 0],
                              [0, 1],
                              [1, 0]],
                             columns=pd.Categorical(['a', 'b']),
                             dtype=np.uint8)
        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:20,代码来源:test_reshape.py

示例13: test_int_df

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_int_df(self, dtype):
        data = DataFrame(
            {'A': [1, 2, 1],
             'B': pd.Categorical(['a', 'b', 'a']),
             'C': [1, 2, 1],
             'D': [1., 2., 1.]
             }
        )
        columns = ['C', 'D', 'A_1', 'A_2', 'B_a', 'B_b']
        expected = DataFrame([
            [1, 1., 1, 0, 1, 0],
            [2, 2., 0, 1, 0, 1],
            [1, 1., 1, 0, 1, 0]
        ], columns=columns)
        expected[columns[2:]] = expected[columns[2:]].astype(dtype)
        result = pd.get_dummies(data, columns=['A', 'B'], dtype=dtype)
        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_reshape.py

示例14: test_dataframe_dummies_preserve_categorical_dtype

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_preserve_categorical_dtype(self, dtype):
        # GH13854
        for ordered in [False, True]:
            cat = pd.Categorical(list("xy"), categories=list("xyz"),
                                 ordered=ordered)
            result = get_dummies(cat, dtype=dtype)

            data = np.array([[1, 0, 0], [0, 1, 0]],
                            dtype=self.effective_dtype(dtype))
            cols = pd.CategoricalIndex(cat.categories,
                                       categories=cat.categories,
                                       ordered=ordered)
            expected = DataFrame(data, columns=cols,
                                 dtype=self.effective_dtype(dtype))

            tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_reshape.py

示例15: process_side

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def process_side():
    print('process_side')

    side_pd = pd.read_csv(patient_info_file)
    # just use demographic information to avoid future information leak such as lab test and lab measurements
    side_pd = side_pd[['subject_id', 'hadm_id', 'icustay_id',
                       'gender_male', 'admission_type', 'first_icu_stay', 'admission_age',
                       'ethnicity', 'weight', 'height']]

    # process side_information
    side_pd = side_pd.dropna(thresh=4)
    side_pd.fillna(side_pd.mean(), inplace=True)
    side_pd = side_pd.groupby(by=['subject_id', 'hadm_id']).head(
        [1]).reset_index(drop=True)
    side_pd = pd.concat(
        [side_pd, pd.get_dummies(side_pd['ethnicity'])], axis=1)
    side_pd.drop(columns=['ethnicity', 'icustay_id'], inplace=True)
    side_pd.rename(columns={'subject_id': 'SUBJECT_ID',
                            'hadm_id': 'HADM_ID'}, inplace=True)
    return side_pd.reset_index(drop=True) 
开发者ID:jshang123,项目名称:G-Bert,代码行数:22,代码来源:EDA.py


注:本文中的pandas.get_dummies方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。