Python preprocessing.OneHotEncoder方法代码示例

本文整理汇总了Python中sklearn.preprocessing.OneHotEncoder方法的典型用法代码示例。如果您正苦于以下问题：Python preprocessing.OneHotEncoder方法的具体用法？Python preprocessing.OneHotEncoder怎么用？Python preprocessing.OneHotEncoder使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing的用法示例。

在下文中一共展示了preprocessing.OneHotEncoder方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_one_hot_encoder_categorical_features

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_categorical_features():
    X = np.array([[3, 2, 1], [0, 1, 1]])
    X2 = np.array([[1, 1, 1]])

    cat = [True, False, False]
    _check_one_hot(X, X2, cat, 4)

    # Edge case: all non-categorical
    cat = [False, False, False]
    _check_one_hot(X, X2, cat, 3)

    # Edge case: all categorical
    cat = [True, True, True]
    _check_one_hot(X, X2, cat, 5)

    # check error raised if also specifying categories
    oh = OneHotEncoder(categories=[range(3)],
                       categorical_features=[True, False, False])
    assert_raises(ValueError, oh.fit, X)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_encoders.py

示例2: cat_onehot_encoder

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def cat_onehot_encoder(df,y,col,selection=True):
    feat_x = df.values.reshape(-1,1)

    from sklearn.preprocessing import LabelEncoder

    le = LabelEncoder()
    le.fit(feat_x)
    feat_x = le.transform(feat_x)

    mlbs = OneHotEncoder(sparse=True).fit(feat_x.reshape(-1,1))
    from scipy.sparse import csr_matrix
    features_tmp = mlbs.transform(feat_x.reshape(-1,1))
    features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
    models = None
    auc_score = None
    if selection is True:
        auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
        print(col, "auc", auc_score)
    #new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
    new_feature = features_tmp




    return new_feature,mlbs,models,auc_score,le

开发者ID:DominickZhang，项目名称:KDDCup2019_admin，代码行数:27，代码来源:feature_expansion.py

示例3: loadmodel

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def loadmodel(self, prefix):
        """ Load the model.

        :param prefix: prefix of the model path
        :return: None
        :type prefix: str
        """
        self.dictionary = Dictionary.load(prefix+'_vocabs.gensimdict')
        parameters = json.load(open(prefix+'_config.json', 'r'))
        self.operation = parameters['operation']
        self.alph = parameters['alph']
        self.specialsignals = parameters['special_signals']
        self.binarizer = SCRNNBinarizer(self.alph, self.specialsignals)
        self.concatcharvec_encoder = SpellingToConcatCharVecEncoder(self.alph)
        self.batchsize = parameters['batchsize']
        self.nb_hiddenunits = parameters['nb_hiddenunits']
        self.onehotencoder = OneHotEncoder()
        self.onehotencoder.fit(np.arange(len(self.dictionary)).reshape((len(self.dictionary), 1)))
        self.model = kerasio.load_model(prefix)
        self.trained = True

开发者ID:stephenhky，项目名称:PyShortTextCategorization，代码行数:22，代码来源:sakaguchi.py

示例4: get_X_y

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def get_X_y(**kwargs):
    """simple wrapper around pd.read_csv that extracts features and labels

    Some systematic preprocessing is also carried out to avoid doing this
    transformation repeatedly in the code.
    """
    global label_encoder
    df = pd.read_csv(info['path'], sep='\t', **kwargs)
    return preprocess(df, label_encoder)

###############################################################################
# Classifier objects in |sklearn| often require :code:`y` to be integer labels.
# Additionally, |APS| requires a binary version of the labels.  For these two
# purposes, we create:
#
# * a |LabelEncoder|, that we pre-fitted on the known :code:`y` classes
# * a |OneHotEncoder|, pre-fitted on the resulting integer labels.
#
# Their |transform| methods can the be called at appopriate times.

开发者ID:dirty-cat，项目名称:dirty_cat，代码行数:21，代码来源:05_scaling_non_linear_models.py

示例5: call

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def __call__(self, data):
        if 'metadata' not in data:
            raise TransformException(
                f"Expected metadata in data, got {list(data.keys())}")
        if 'labels' not in data['metadata']:
            raise TransformException(
                f"Expected labels in data['metadata'], got "
                f"{list(data['metadata'].keys())}")

        enc = OneHotEncoder(categories=[data['metadata']['labels']])

        sources = data[self.source_key]
        source_keys = [k.split('::')[0] for k in list(sources.keys())]
        source_labels = [[l] for l in sorted(source_keys)]

        one_hot_labels = enc.fit_transform(source_labels)
        data['one_hot_labels'] = one_hot_labels.toarray()

        return data

开发者ID:nussl，项目名称:nussl，代码行数:21，代码来源:transforms.py

示例6: init

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def __init__(self, 
                 maptimes = 10, 
                 enhencetimes = 10,
                 map_function = 'linear',
                 enhence_function = 'linear',
                 batchsize = 'auto', 
                 reg = 0.001):
        
        self._maptimes = maptimes
        self._enhencetimes = enhencetimes
        self._batchsize = batchsize
        self._reg = reg
        self._map_function = map_function
        self._enhence_function = enhence_function
        
        self.W = 0
        self.pesuedoinverse = 0
        self.normalscaler = scaler()
        self.onehotencoder = preprocessing.OneHotEncoder(sparse = False)
        self.mapping_generator = node_generator()
        self.enhence_generator = node_generator(whiten = True)

开发者ID:LiangjunFeng，项目名称:Broad-Learning-System，代码行数:23，代码来源:bls.py

示例7: test_column_transformer_list

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_column_transformer_list():
    X_list = [
        [1, float('nan'), 'a'],
        [0, 0, 'b']
    ]
    expected_result = np.array([
        [1, float('nan'), 1, 0],
        [-1, 0, 0, 1],
    ])

    ct = ColumnTransformer([
        ('numerical', StandardScaler(), [0, 1]),
        ('categorical', OneHotEncoder(), [2]),
    ])

    assert_array_equal(ct.fit_transform(X_list), expected_result)
    assert_array_equal(ct.fit(X_list).transform(X_list), expected_result)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:19，代码来源:test_column_transformer.py

示例8: test_encode_options

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encode_options():
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='ordinal').fit(X)
    Xt_1 = est.transform(X)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot-dense').fit(X)
    Xt_2 = est.transform(X)
    assert not sp.issparse(Xt_2)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=False)
                       .fit_transform(Xt_1), Xt_2)
    est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
                           encode='onehot').fit(X)
    Xt_3 = est.transform(X)
    assert sp.issparse(Xt_3)
    assert_array_equal(OneHotEncoder(
                           categories=[np.arange(i) for i in [2, 3, 3, 3]],
                           sparse=True)
                       .fit_transform(Xt_1).toarray(),
                       Xt_3.toarray())

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:23，代码来源:test_discretization.py

示例9: test_one_hot_encoder_force_new_behaviour

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_force_new_behaviour():
    # ambiguous integer case (non secutive range of categories)
    X = np.array([[1, 2]]).T
    X2 = np.array([[0, 1]]).T

    # without argument -> by default using legacy behaviour with warnings
    enc = OneHotEncoder()

    with ignore_warnings(category=FutureWarning):
        enc.fit(X)

    res = enc.transform(X2)
    exp = np.array([[0, 0], [1, 0]])
    assert_array_equal(res.toarray(), exp)

    # with explicit auto argument -> don't use legacy behaviour
    # (so will raise an error on unseen value within range)
    enc = OneHotEncoder(categories='auto')
    enc.fit(X)
    assert_raises(ValueError, enc.transform, X2)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:22，代码来源:test_encoders.py

示例10: test_one_hot_encoder_specified_categories

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_specified_categories(X, X2, cats, cat_dtype):
    enc = OneHotEncoder(categories=cats)
    exp = np.array([[1., 0., 0.],
                    [0., 1., 0.]])
    assert_array_equal(enc.fit_transform(X).toarray(), exp)
    assert list(enc.categories[0]) == list(cats[0])
    assert enc.categories_[0].tolist() == list(cats[0])
    # manually specified categories should have same dtype as
    # the data when coerced from lists
    assert enc.categories_[0].dtype == cat_dtype

    # when specifying categories manually, unknown categories should already
    # raise when fitting
    enc = OneHotEncoder(categories=cats)
    with pytest.raises(ValueError, match="Found unknown categories"):
        enc.fit(X2)
    enc = OneHotEncoder(categories=cats, handle_unknown='ignore')
    exp = np.array([[1., 0., 0.], [0., 0., 0.]])
    assert_array_equal(enc.fit(X2).transform(X2).toarray(), exp)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_encoders.py

示例11: test_one_hot_encoder_unsorted_categories

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_unsorted_categories():
    X = np.array([['a', 'b']], dtype=object).T

    enc = OneHotEncoder(categories=[['b', 'a', 'c']])
    exp = np.array([[0., 1., 0.],
                    [1., 0., 0.]])
    assert_array_equal(enc.fit(X).transform(X).toarray(), exp)
    assert_array_equal(enc.fit_transform(X).toarray(), exp)
    assert enc.categories_[0].tolist() == ['b', 'a', 'c']
    assert np.issubdtype(enc.categories_[0].dtype, np.object_)

    # unsorted passed categories still raise for numerical values
    X = np.array([[1, 2]]).T
    enc = OneHotEncoder(categories=[[2, 1, 3]])
    msg = 'Unsorted categories are not supported'
    with pytest.raises(ValueError, match=msg):
        enc.fit_transform(X)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:19，代码来源:test_encoders.py

示例12: test_one_hot_encoder_raise_missing

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_raise_missing(X, as_data_frame, handle_unknown):
    if as_data_frame:
        pd = pytest.importorskip('pandas')
        X = pd.DataFrame(X)

    ohe = OneHotEncoder(categories='auto', handle_unknown=handle_unknown)

    with pytest.raises(ValueError, match="Input contains NaN"):
        ohe.fit(X)

    with pytest.raises(ValueError, match="Input contains NaN"):
        ohe.fit_transform(X)

    if as_data_frame:
        X_partial = X.iloc[:1, :]
    else:
        X_partial = X[:1, :]

    ohe.fit(X_partial)

    with pytest.raises(ValueError, match="Input contains NaN"):
        ohe.transform(X)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:24，代码来源:test_encoders.py

示例13: test_encoder_dtypes

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encoder_dtypes():
    # check that dtypes are preserved when determining categories
    enc = OneHotEncoder(categories='auto')
    exp = np.array([[1., 0., 1., 0.], [0., 1., 0., 1.]], dtype='float64')

    for X in [np.array([[1, 2], [3, 4]], dtype='int64'),
              np.array([[1, 2], [3, 4]], dtype='float64'),
              np.array([['a', 'b'], ['c', 'd']]),  # string dtype
              np.array([[1, 'a'], [3, 'b']], dtype='object')]:
        enc.fit(X)
        assert all([enc.categories_[i].dtype == X.dtype for i in range(2)])
        assert_array_equal(enc.transform(X).toarray(), exp)

    X = [[1, 2], [3, 4]]
    enc.fit(X)
    assert all([np.issubdtype(enc.categories_[i].dtype, np.integer)
                for i in range(2)])
    assert_array_equal(enc.transform(X).toarray(), exp)

    X = [[1, 'a'], [3, 'b']]
    enc.fit(X)
    assert all([enc.categories_[i].dtype == 'object' for i in range(2)])
    assert_array_equal(enc.transform(X).toarray(), exp)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:25，代码来源:test_encoders.py

示例14: test_encoder_dtypes_pandas

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encoder_dtypes_pandas():
    # check dtype (similar to test_categorical_encoder_dtypes for dataframes)
    pd = pytest.importorskip('pandas')

    enc = OneHotEncoder(categories='auto')
    exp = np.array([[1., 0., 1., 0., 1., 0.],
                    [0., 1., 0., 1., 0., 1.]], dtype='float64')

    X = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]}, dtype='int64')
    enc.fit(X)
    assert all([enc.categories_[i].dtype == 'int64' for i in range(2)])
    assert_array_equal(enc.transform(X).toarray(), exp)

    X = pd.DataFrame({'A': [1, 2], 'B': ['a', 'b'], 'C': [3., 4.]})
    X_type = [int, object, float]
    enc.fit(X)
    assert all([enc.categories_[i].dtype == X_type[i] for i in range(3)])
    assert_array_equal(enc.transform(X).toarray(), exp)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:20，代码来源:test_encoders.py

示例15: test_one_hot_encoder_drop_manual

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_drop_manual():
    cats_to_drop = ['def', 12, 3, 56]
    enc = OneHotEncoder(drop=cats_to_drop)
    X = [['abc', 12, 2, 55],
         ['def', 12, 1, 55],
         ['def', 12, 3, 56]]
    trans = enc.fit_transform(X).toarray()
    exp = [[1, 0, 1, 1],
           [0, 1, 0, 1],
           [0, 0, 0, 0]]
    assert_array_equal(trans, exp)
    dropped_cats = [cat[feature]
                    for cat, feature in zip(enc.categories_,
                                            enc.drop_idx_)]
    assert_array_equal(dropped_cats, cats_to_drop)
    assert_array_equal(np.array(X, dtype=object),
                       enc.inverse_transform(trans))