本文整理汇总了Python中sklearn.preprocessing.Normalizer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.Normalizer方法的具体用法?Python preprocessing.Normalizer怎么用?Python preprocessing.Normalizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.Normalizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_make_column_transformer_kwargs
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_make_column_transformer_kwargs():
scaler = StandardScaler()
norm = Normalizer()
ct = make_column_transformer((scaler, 'first'), (norm, ['second']),
n_jobs=3, remainder='drop',
sparse_threshold=0.5)
assert_equal(ct.transformers, make_column_transformer(
(scaler, 'first'), (norm, ['second'])).transformers)
assert_equal(ct.n_jobs, 3)
assert_equal(ct.remainder, 'drop')
assert_equal(ct.sparse_threshold, 0.5)
# invalid keyword parameters should raise an error message
assert_raise_message(
TypeError,
'Unknown keyword arguments: "transformer_weights"',
make_column_transformer, (scaler, 'first'), (norm, ['second']),
transformer_weights={'pca': 10, 'Transf': 1}
)
示例2: get_model
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def get_model(with_pipeline=False):
"""Get a multi-layer perceptron model.
Optionally, put it in a pipeline that scales the data.
"""
model = NeuralNetClassifier(MLPClassifier)
if with_pipeline:
model = Pipeline([
('scale', FeatureUnion([
('minmax', MinMaxScaler()),
('normalize', Normalizer()),
])),
('select', SelectKBest(k=N_FEATURES)), # keep input size constant
('net', model),
])
return model
示例3: test_boston_OHE_pipeline
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_boston_OHE_pipeline(self):
data = load_boston()
for categorical_features in [[3], [8], [3, 8], [8, 3]]:
# Put it in a pipeline so that we can test whether the output dimension
# handling is correct.
model = Pipeline(
[
("OHE", OneHotEncoder(categorical_features=categorical_features)),
("Normalizer", Normalizer()),
]
)
model.fit(data.data.copy(), data.target)
# Convert the model
spec = sklearn.convert(model, data.feature_names, "out").get_spec()
input_data = [dict(zip(data.feature_names, row)) for row in data.data]
output_data = [{"out": row} for row in model.transform(data.data.copy())]
result = evaluate_transformer(spec, input_data, output_data)
assert result["num_errors"] == 0
示例4: test_random
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_random(self):
# Generate some random data_imputeValue.multiArrayValue[i]
X = _np.random.random(size=(50, 3))
for param in ("l1", "l2", "max"):
cur_model = Normalizer(norm=param)
output = cur_model.fit_transform(X)
spec = converter.convert(cur_model, ["a", "b", "c"], "out")
evaluate_transformer(
spec,
[dict(zip(["a", "b", "c"], row)) for row in X],
[{"out": row} for row in output],
)
示例5: test_within_pipeline
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_within_pipeline():
pytest.importorskip('cv2')
pytest.importorskip('sklearn')
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer
stim = join(get_test_data_path(), 'image', 'apple.jpg')
graph = Graph([BrightnessExtractor(), SharpnessExtractor()])
trans = PliersTransformer(graph)
normalizer = Normalizer()
pipeline = Pipeline([('pliers', trans), ('normalizer', normalizer)])
res = pipeline.fit_transform(stim)
assert res.shape == (1, 2)
assert np.isclose(res[0][0], 0.66393, 1e-5)
assert np.isclose(res[0][1], 0.74780, 1e-5)
meta = trans.metadata_
assert 'onset' in meta.columns
assert meta['class'][0] == 'ImageStim'
示例6: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def __init__(self, source_model: mx.mod.Module, feature_layer_names, context_function=mx.context.cpu, num_devices=1,
max_function_evaluations=100, apply_l2_norm=False):
# Call base class constructor with parameters required for meta-models
super().__init__(source_model, feature_layer_names, context_function, num_devices)
self.max_function_evaluations = max_function_evaluations
self.apply_l2_norm = apply_l2_norm
# Mean of features to use for normalization. Computed in training phase.
# Used to normalize features in training and in prediction.
self.feature_mean = None
# Optimizer to use for training GP model
self.optimizer = 'lbfgs'
# Number of inducing points to use for sparse GP
self.NUM_INDUCING_SPARSE_GP = 100
# Normalizer to use when apply_l2_norm flag is set
self.l2_normalizer = Normalizer(norm='l2')
示例7: test_kneighbors_with_or_without_self_hit
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_kneighbors_with_or_without_self_hit(LSH: callable, metric, n_jobs, verbose):
X, y = make_classification(random_state=234)
X = Normalizer().fit_transform(X)
lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose)
lsh.fit(X, y)
neigh_dist, neigh_ind = lsh.kneighbors(return_distance=True)
neigh_dist_self, neigh_ind_self = lsh.kneighbors(X, return_distance=True)
ind_only = lsh.kneighbors(return_distance=False)
ind_only_self = lsh.kneighbors(X, return_distance=False)
assert_array_equal(neigh_ind, ind_only)
assert_array_equal(neigh_ind_self, ind_only_self)
assert (neigh_ind - neigh_ind_self).mean() <= .01, f'More than 1% of neighbors mismatch'
assert ((neigh_dist - neigh_dist_self) < 0.0001).mean() <= 0.01,\
f'Not almost equal to 4 decimals in more than 1% of neighbor slots'
示例8: test_radius_neighbors_with_or_without_self_hit
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_radius_neighbors_with_or_without_self_hit(LSH, metric, n_jobs, verbose):
X, y = make_classification()
X = Normalizer().fit_transform(X)
lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose)
lsh.fit(X, y)
radius = lsh.kneighbors(n_candidates=3)[0][:, 2].max()
neigh_dist, neigh_ind = lsh.radius_neighbors(return_distance=True, radius=radius)
neigh_dist_self, neigh_ind_self = lsh.radius_neighbors(X, return_distance=True, radius=radius)
ind_only = lsh.radius_neighbors(return_distance=False, radius=radius)
ind_only_self = lsh.radius_neighbors(X, return_distance=False, radius=radius)
assert len(neigh_ind) == len(neigh_ind_self) == len(neigh_dist) == len(neigh_dist_self)
for i in range(len(neigh_ind)):
assert_array_equal(neigh_ind[i], ind_only[i])
assert_array_equal(neigh_ind_self[i], ind_only_self[i])
assert_array_equal(neigh_ind[i][:3],
neigh_ind_self[i][1:4])
assert_array_almost_equal(neigh_dist[i][:3],
neigh_dist_self[i][1:4])
示例9: test_squared_euclidean_same_neighbors_as_euclidean
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_squared_euclidean_same_neighbors_as_euclidean(LSH):
X, y = make_classification(random_state=234)
X = Normalizer().fit_transform(X)
lsh = LSH(metric='minkowski')
lsh.fit(X, y)
neigh_dist_eucl, neigh_ind_eucl = lsh.kneighbors()
lsh_sq = LSH(metric='sqeuclidean')
lsh_sq.fit(X, y)
neigh_dist_sqeucl, neigh_ind_sqeucl = lsh_sq.kneighbors()
assert_array_equal(neigh_ind_eucl, neigh_ind_sqeucl)
assert_array_almost_equal(neigh_dist_eucl ** 2, neigh_dist_sqeucl)
if LSH in LSH_WITH_RADIUS:
radius = neigh_dist_eucl[:, 2].max()
rad_dist_eucl, rad_ind_eucl = lsh.radius_neighbors(radius=radius)
rad_dist_sqeucl, rad_ind_sqeucl = lsh_sq.radius_neighbors(radius=radius**2)
for i in range(len(rad_ind_eucl)):
assert_array_equal(rad_ind_eucl[i], rad_ind_sqeucl[i])
assert_array_almost_equal(rad_dist_eucl[i] ** 2, rad_dist_sqeucl[i])
示例10: nbow_model
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def nbow_model(task, embeddings, word2idx):
if task == "clf":
algo = LogisticRegression(C=0.6, random_state=0,
class_weight='balanced')
elif task == "reg":
algo = SVR(kernel='linear', C=0.6)
else:
raise ValueError("invalid task!")
embeddings_features = NBOWVectorizer(aggregation=["mean"],
embeddings=embeddings,
word2idx=word2idx,
stopwords=False)
model = Pipeline([
('embeddings-feats', embeddings_features),
('normalizer', Normalizer(norm='l2')),
('clf', algo)
])
return model
示例11: test_objectmapper
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
self.assertIs(df.preprocessing.FunctionTransformer,
pp.FunctionTransformer)
self.assertIs(df.preprocessing.Imputer, pp.Imputer)
self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
示例12: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def __init__(self, min_df=1, max_df=0.9, tokenizer=LemmaTokenizer, hash=False):
"""
`min_df` is set to filter out extremely rare words,
since we don't want those to dominate the distance metric.
`max_df` is set to filter out extremely common words,
since they don't convey much information.
"""
# Wrap the specified tokenizer
t = Tokenizer(tokenizer())
if hash:
vectr = HashingVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t)
else:
vectr = CountVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t, min_df=min_df, max_df=max_df)
args = [
('vectorizer', vectr),
('tfidf', TfidfTransformer(norm=None, use_idf=True, smooth_idf=True)),
('normalizer', Normalizer(copy=False))
]
self.pipeline = Pipeline(args)
self.trained = False
示例13: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def __init__(self):
super().__init__()
self.normalizer = Normalizer()
示例14: train
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def train(self, training_data_X, training_data_Y):
self.normalizer = Normalizer()
self.svc = svm.SVC(gamma=0.001, C=100.)
normalised_training_data_X = self.normalizer.fit_transform(training_data_X)
self.svc.fit(normalised_training_data_X, training_data_Y)
示例15: data_cleaning_formatting
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Normalizer [as 别名]
def data_cleaning_formatting(X):
# Basic cleaning
X = X.fillna(0)
X = X.fillna('ffill')
# Encode data
X = encode_data(X)
X = Normalizer().fit_transform(X)
return X