本文整理汇总了Python中pandas.get_dummies方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.get_dummies方法的具体用法?Python pandas.get_dummies怎么用?Python pandas.get_dummies使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.get_dummies方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: model
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10):
all_attributes = pd.get_dummies(profiles).columns
profiles_dummies = pd.get_dummies(profiles, drop_first=True)
choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1)
respondants = selections.columns
n_attributes_in_model = profiles_dummies.shape[1]
n_participants = selections.shape[1]
with pm.Model():
# https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf
# need to include the covariance matrix as a parent of `partsworth`
alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model))
partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model))
cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())]
trace = pm.sample(sample)
return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes)
示例2: test_basic
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic(self, sparse, dtype):
s_list = list('abc')
s_series = Series(s_list)
s_series_index = Series(s_list, list('ABC'))
expected = DataFrame({'a': [1, 0, 0],
'b': [0, 1, 0],
'c': [0, 0, 1]},
dtype=self.effective_dtype(dtype))
if sparse:
expected = expected.apply(pd.SparseArray, fill_value=0.0)
result = get_dummies(s_list, sparse=sparse, dtype=dtype)
assert_frame_equal(result, expected)
result = get_dummies(s_series, sparse=sparse, dtype=dtype)
assert_frame_equal(result, expected)
expected.index = list('ABC')
result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
assert_frame_equal(result, expected)
示例3: test_just_na
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_just_na(self, sparse):
just_na_list = [np.nan]
just_na_series = Series(just_na_list)
just_na_series_index = Series(just_na_list, index=['A'])
res_list = get_dummies(just_na_list, sparse=sparse)
res_series = get_dummies(just_na_series, sparse=sparse)
res_series_index = get_dummies(just_na_series_index, sparse=sparse)
assert res_list.empty
assert res_series.empty
assert res_series_index.empty
assert res_list.index.tolist() == [0]
assert res_series.index.tolist() == [0]
assert res_series_index.index.tolist() == ['A']
示例4: test_dataframe_dummies_all_obj
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_all_obj(self, df, sparse):
df = df[['A', 'B']]
result = get_dummies(df, sparse=sparse)
expected = DataFrame({'A_a': [1, 0, 1],
'A_b': [0, 1, 0],
'B_b': [1, 1, 0],
'B_c': [0, 0, 1]},
dtype=np.uint8)
if sparse:
expected = pd.DataFrame({
"A_a": pd.SparseArray([1, 0, 1], dtype='uint8'),
"A_b": pd.SparseArray([0, 1, 0], dtype='uint8'),
"B_b": pd.SparseArray([1, 1, 0], dtype='uint8'),
"B_c": pd.SparseArray([0, 0, 1], dtype='uint8'),
})
assert_frame_equal(result, expected)
示例5: test_dataframe_dummies_prefix_list
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_list(self, df, sparse):
prefixes = ['from_A', 'from_B']
result = get_dummies(df, prefix=prefixes, sparse=sparse)
expected = DataFrame({'C': [1, 2, 3],
'from_A_a': [1, 0, 1],
'from_A_b': [0, 1, 0],
'from_B_b': [1, 1, 0],
'from_B_c': [0, 0, 1]},
dtype=np.uint8)
expected[['C']] = df[['C']]
cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
expected = expected[['C'] + cols]
typ = pd.SparseArray if sparse else pd.Series
expected[cols] = expected[cols].apply(lambda x: typ(x))
assert_frame_equal(result, expected)
示例6: test_dataframe_dummies_prefix_str
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_str(self, df, sparse):
# not that you should do this...
result = get_dummies(df, prefix='bad', sparse=sparse)
bad_columns = ['bad_a', 'bad_b', 'bad_b', 'bad_c']
expected = DataFrame([[1, 1, 0, 1, 0],
[2, 0, 1, 1, 0],
[3, 1, 0, 0, 1]],
columns=['C'] + bad_columns,
dtype=np.uint8)
expected = expected.astype({"C": np.int64})
if sparse:
# work around astyping & assigning with duplicate columns
# https://github.com/pandas-dev/pandas/issues/14427
expected = pd.concat([
pd.Series([1, 2, 3], name='C'),
pd.Series([1, 0, 1], name='bad_a', dtype='Sparse[uint8]'),
pd.Series([0, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
pd.Series([1, 1, 0], name='bad_b', dtype='Sparse[uint8]'),
pd.Series([0, 0, 1], name='bad_c', dtype='Sparse[uint8]'),
], axis=1)
assert_frame_equal(result, expected)
示例7: test_dataframe_dummies_prefix_dict
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_prefix_dict(self, sparse):
prefixes = {'A': 'from_A', 'B': 'from_B'}
df = DataFrame({'C': [1, 2, 3],
'A': ['a', 'b', 'a'],
'B': ['b', 'b', 'c']})
result = get_dummies(df, prefix=prefixes, sparse=sparse)
expected = DataFrame({'C': [1, 2, 3],
'from_A_a': [1, 0, 1],
'from_A_b': [0, 1, 0],
'from_B_b': [1, 1, 0],
'from_B_c': [0, 0, 1]})
columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
expected[columns] = expected[columns].astype(np.uint8)
if sparse:
expected[columns] = expected[columns].apply(
lambda x: pd.SparseSeries(x)
)
assert_frame_equal(result, expected)
示例8: test_dataframe_dummies_with_categorical
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
df['cat'] = pd.Categorical(['x', 'y', 'y'])
result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
if sparse:
arr = SparseArray
typ = SparseDtype(dtype, 0)
else:
arr = np.array
typ = dtype
expected = DataFrame({'C': [1, 2, 3],
'A_a': arr([1, 0, 1], dtype=typ),
'A_b': arr([0, 1, 0], dtype=typ),
'B_b': arr([1, 1, 0], dtype=typ),
'B_c': arr([0, 0, 1], dtype=typ),
'cat_x': arr([1, 0, 0], dtype=typ),
'cat_y': arr([0, 1, 1], dtype=typ)
}).sort_index(axis=1)
assert_frame_equal(result, expected)
示例9: test_basic_drop_first
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first(self, sparse):
# GH12402 Add a new parameter `drop_first` to avoid collinearity
# Basic case
s_list = list('abc')
s_series = Series(s_list)
s_series_index = Series(s_list, list('ABC'))
expected = DataFrame({'b': [0, 1, 0],
'c': [0, 0, 1]},
dtype=np.uint8)
result = get_dummies(s_list, drop_first=True, sparse=sparse)
if sparse:
expected = expected.apply(pd.SparseArray, fill_value=0)
assert_frame_equal(result, expected)
result = get_dummies(s_series, drop_first=True, sparse=sparse)
assert_frame_equal(result, expected)
expected.index = list('ABC')
result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
assert_frame_equal(result, expected)
示例10: test_basic_drop_first_one_level
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first_one_level(self, sparse):
# Test the case that categorical variable only has one level.
s_list = list('aaa')
s_series = Series(s_list)
s_series_index = Series(s_list, list('ABC'))
expected = DataFrame(index=np.arange(3))
result = get_dummies(s_list, drop_first=True, sparse=sparse)
assert_frame_equal(result, expected)
result = get_dummies(s_series, drop_first=True, sparse=sparse)
assert_frame_equal(result, expected)
expected = DataFrame(index=list('ABC'))
result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
assert_frame_equal(result, expected)
示例11: test_basic_drop_first_NA
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_basic_drop_first_NA(self, sparse):
# Test NA handling together with drop_first
s_NA = ['a', 'b', np.nan]
res = get_dummies(s_NA, drop_first=True, sparse=sparse)
exp = DataFrame({'b': [0, 1, 0]}, dtype=np.uint8)
if sparse:
exp = exp.apply(pd.SparseArray, fill_value=0)
assert_frame_equal(res, exp)
res_na = get_dummies(s_NA, dummy_na=True, drop_first=True,
sparse=sparse)
exp_na = DataFrame(
{'b': [0, 1, 0],
nan: [0, 0, 1]},
dtype=np.uint8).reindex(['b', nan], axis=1)
if sparse:
exp_na = exp_na.apply(pd.SparseArray, fill_value=0)
assert_frame_equal(res_na, exp_na)
res_just_na = get_dummies([nan], dummy_na=True, drop_first=True,
sparse=sparse)
exp_just_na = DataFrame(index=np.arange(1))
assert_frame_equal(res_just_na, exp_just_na)
示例12: test_int_int
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_int_int(self):
data = Series([1, 2, 1])
result = pd.get_dummies(data)
expected = DataFrame([[1, 0],
[0, 1],
[1, 0]],
columns=[1, 2],
dtype=np.uint8)
tm.assert_frame_equal(result, expected)
data = Series(pd.Categorical(['a', 'b', 'a']))
result = pd.get_dummies(data)
expected = DataFrame([[1, 0],
[0, 1],
[1, 0]],
columns=pd.Categorical(['a', 'b']),
dtype=np.uint8)
tm.assert_frame_equal(result, expected)
示例13: test_int_df
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_int_df(self, dtype):
data = DataFrame(
{'A': [1, 2, 1],
'B': pd.Categorical(['a', 'b', 'a']),
'C': [1, 2, 1],
'D': [1., 2., 1.]
}
)
columns = ['C', 'D', 'A_1', 'A_2', 'B_a', 'B_b']
expected = DataFrame([
[1, 1., 1, 0, 1, 0],
[2, 2., 0, 1, 0, 1],
[1, 1., 1, 0, 1, 0]
], columns=columns)
expected[columns[2:]] = expected[columns[2:]].astype(dtype)
result = pd.get_dummies(data, columns=['A', 'B'], dtype=dtype)
tm.assert_frame_equal(result, expected)
示例14: test_dataframe_dummies_preserve_categorical_dtype
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def test_dataframe_dummies_preserve_categorical_dtype(self, dtype):
# GH13854
for ordered in [False, True]:
cat = pd.Categorical(list("xy"), categories=list("xyz"),
ordered=ordered)
result = get_dummies(cat, dtype=dtype)
data = np.array([[1, 0, 0], [0, 1, 0]],
dtype=self.effective_dtype(dtype))
cols = pd.CategoricalIndex(cat.categories,
categories=cat.categories,
ordered=ordered)
expected = DataFrame(data, columns=cols,
dtype=self.effective_dtype(dtype))
tm.assert_frame_equal(result, expected)
示例15: process_side
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import get_dummies [as 别名]
def process_side():
print('process_side')
side_pd = pd.read_csv(patient_info_file)
# just use demographic information to avoid future information leak such as lab test and lab measurements
side_pd = side_pd[['subject_id', 'hadm_id', 'icustay_id',
'gender_male', 'admission_type', 'first_icu_stay', 'admission_age',
'ethnicity', 'weight', 'height']]
# process side_information
side_pd = side_pd.dropna(thresh=4)
side_pd.fillna(side_pd.mean(), inplace=True)
side_pd = side_pd.groupby(by=['subject_id', 'hadm_id']).head(
[1]).reset_index(drop=True)
side_pd = pd.concat(
[side_pd, pd.get_dummies(side_pd['ethnicity'])], axis=1)
side_pd.drop(columns=['ethnicity', 'icustay_id'], inplace=True)
side_pd.rename(columns={'subject_id': 'SUBJECT_ID',
'hadm_id': 'HADM_ID'}, inplace=True)
return side_pd.reset_index(drop=True)