本文整理汇总了Python中sklearn.preprocessing.MinMaxScaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python MinMaxScaler.fit_transform方法的具体用法?Python MinMaxScaler.fit_transform怎么用?Python MinMaxScaler.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.MinMaxScaler
的用法示例。
在下文中一共展示了MinMaxScaler.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prescale_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def prescale_data(x_train, x_test, method):
"""
Pre-scales training data and (optionally test data) using the specified method.
:param x_train: The training data to be pre-scaled.
:param x_test: The (optional) test data to be pre-scaled. Beware that the prescaler is only fit to the training
data and not to the test data.
:param method: The method to be used for prescaling. Allowed values are "minmaxscaler" and "standartscaler"
:return: A tuple of the pre-scaled training and test data or only the training data if the test data was set to None
"""
if method is not None:
scaler = None
if method == "minmaxscaler":
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
if method == "standartscaler":
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
if scaler is None:
raise ValueError("Invalid pre-scaling method: {}".format(method))
scaler.fit_transform(x_train)
x_train = scaler.transform(x_train)
if x_test is not None:
x_test = scaler.transform(x_test)
if x_test is not None:
return x_train, x_test
else:
return x_train
示例2: test_min_max_scaler_zero_variance_features
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def test_min_max_scaler_zero_variance_features():
"""Check min max scaler on toy data with zero variance features"""
X = [[0., 1., 0.5],
[0., 1., -0.1],
[0., 1., 1.1]]
X_new = [[+0., 2., 0.5],
[-1., 1., 0.0],
[+0., 1., 1.5]]
# default params
scaler = MinMaxScaler()
X_trans = scaler.fit_transform(X)
X_expected_0_1 = [[0., 0., 0.5],
[0., 0., 0.0],
[0., 0., 1.0]]
assert_array_almost_equal(X_trans, X_expected_0_1)
X_trans_new = scaler.transform(X_new)
X_expected_0_1_new = [[+0., 1., 0.500],
[-1., 0., 0.083],
[+0., 0., 1.333]]
assert_array_almost_equal(X_trans_new, X_expected_0_1_new, decimal=2)
# not default params
scaler = MinMaxScaler(feature_range=(1, 2))
X_trans = scaler.fit_transform(X)
X_expected_1_2 = [[1., 1., 1.5],
[1., 1., 1.0],
[1., 1., 2.0]]
assert_array_almost_equal(X_trans, X_expected_1_2)
示例3: loaddataset
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def loaddataset(self,path,module):
df=pd.read_csv(path)
subdf = df[['PassengerId','Pclass','Sex','Age','Embarked','Fare','SibSp','Parch']]
SibSp=subdf['SibSp']
Parch=subdf['Parch']
# supplement Age
Age=subdf['Age'].fillna(value=subdf.Age.mean())
Fare=subdf['Fare'].fillna(value=subdf.Fare.mean())
dummies_Sex=pd.get_dummies(subdf['Sex'],prefix='Sex')
dummies_Embarked = pd.get_dummies(subdf['Embarked'], prefix= 'Embarked')
dummies_Pclass = pd.get_dummies(subdf['Pclass'], prefix= 'Pclass')
PassengerId=subdf['PassengerId']
# Age&Fare to Scaler
scaler=MinMaxScaler()
age_scaled=scaler.fit_transform(Age.values)
fare_scaled=scaler.fit_transform(Fare.values)
Age_Scaled=pd.DataFrame(age_scaled,columns=['Age_Scaled'])
Fare_Scaled=pd.DataFrame(fare_scaled,columns=['Fare_Scaled'])
if module=='train':
self.trainlabel=df.Survived
self.trainset=pd.concat([dummies_Pclass,dummies_Sex,dummies_Embarked,Age_Scaled,Fare_Scaled,SibSp,Parch],axis=1)
elif module=='test':
self.testset=pd.concat([PassengerId,dummies_Pclass,dummies_Sex,dummies_Embarked,Age_Scaled,Fare_Scaled,SibSp,Parch],axis=1)
示例4: test_min_max_scaler_iris
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def test_min_max_scaler_iris():
X = iris.data
scaler = MinMaxScaler()
# default params
X_trans = scaler.fit_transform(X)
assert_array_almost_equal(X_trans.min(axis=0), 0)
assert_array_almost_equal(X_trans.min(axis=0), 0)
assert_array_almost_equal(X_trans.max(axis=0), 1)
X_trans_inv = scaler.inverse_transform(X_trans)
assert_array_almost_equal(X, X_trans_inv)
# not default params: min=1, max=2
scaler = MinMaxScaler(feature_range=(1, 2))
X_trans = scaler.fit_transform(X)
assert_array_almost_equal(X_trans.min(axis=0), 1)
assert_array_almost_equal(X_trans.max(axis=0), 2)
X_trans_inv = scaler.inverse_transform(X_trans)
assert_array_almost_equal(X, X_trans_inv)
# min=-.5, max=.6
scaler = MinMaxScaler(feature_range=(-.5, .6))
X_trans = scaler.fit_transform(X)
assert_array_almost_equal(X_trans.min(axis=0), -.5)
assert_array_almost_equal(X_trans.max(axis=0), .6)
X_trans_inv = scaler.inverse_transform(X_trans)
assert_array_almost_equal(X, X_trans_inv)
# raises on invalid range
scaler = MinMaxScaler(feature_range=(2, 1))
assert_raises(ValueError, scaler.fit, X)
示例5: normalize_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def normalize_data(tr_x,ts_x,normz=None,axis=0):
if normz is 'scale':
tr_x = scale(tr_x,axis=axis)
ts_x = scale(ts_x,axis=axis)
elif normz is 'minmax':
minmax_scaler = MinMaxScaler()
if axis==0:
for c_i in range(tr_x.shape[1]):
tr_x[:,c_i] = minmax_scaler.fit_transform(tr_x[:,c_i])
ts_x[:,c_i] = minmax_scaler.fit_transform(ts_x[:,c_i])
elif axis==1:
for r_i in range(tr_x.shape[0]):
tr_x[r_i,:] = minmax_scaler.fit_transform(tr_x[r_i,:])
ts_x[r_i,:] = minmax_scaler.fit_transform(ts_x[r_i,:])
elif normz is 'sigmoid':
if axis==0:
col_max = np.max(tr_x,axis=0)
cols_non_norm = np.argwhere(col_max>1).tolist()
tr_x[:,cols_non_norm] = -0.5 + (1 / (1 + np.exp(-tr_x[:,cols_non_norm])))
# TODO: implement col_max col_non_norm for test set
ts_x[:,cols_non_norm] = -0.5 + (1/(1+np.exp(-ts_x[:,cols_non_norm])))
elif axis==1:
row_max = np.max(tr_x,axis=1)
rows_non_norm = np.argwhere(row_max>1).tolist()
tr_x[rows_non_norm,:] = -0.5 + (1 / (1 + np.exp(-tr_x[rows_non_norm,:])))
# TODO: implement row_max row_non_norm for test set
ts_x[rows_non_norm,:] = -0.5 + (1/(1+np.exp(-ts_x[rows_non_norm,:])))
return tr_x,ts_x
示例6: NMFReducer
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
class NMFReducer():
def __init__(self, dataset, dataset_name, num_components=10):
self.dataset = dataset
self.dataset_name = dataset_name
self.labels = dataset.target
self.scaler = MinMaxScaler()
self.data = self.scaler.fit_transform(dataset.data)
self.n_samples, self.n_features = self.data.shape
self.reducer = NMF(n_components=num_components, max_iter=5000)
def reduce(self):
self.reducer.fit(self.data)
self.reduced = self.scaler.fit_transform(self.reducer.transform(self.data))
return self.reduced
def benchmark(self, estimator, name, data):
t0 = time()
sample_size = 300
labels = self.labels
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))
def display_reduced_digits(self):
sys.stdout = open('out/NMFReduceDigitsOutput.txt', 'w')
print("NMF Reduction of %s:\n" % self.dataset_name)
print(40 * '-')
print(self.reduced)
print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
print(40 * '-')
print(self.reducer.reconstruction_err_)
def display_reduced_iris(self):
sys.stdout = open('out/NMFReduceIrisOutput.txt', 'w')
print("NMF Reduction of %s:\n" % self.dataset_name)
print(40 * '-')
print(self.reduced)
print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
print(40 * '-')
print(self.reducer.reconstruction_err_)
def reduce_crossvalidation_set(self, X_train, X_test):
self.reducer.fit(X_train)
reduced_X_train = self.scaler.transform(X_train)
reduced_X_test = self.scaler.transform(X_test)
return reduced_X_train, reduced_X_test
示例7: _scale
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def _scale(self, y):
z = MinMaxScaler()
try:
return z.fit_transform(y)
except:
y = np.array(y)
y = z.fit_transform(y)
return y.tolist()
示例8: predict_simple_linear
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def predict_simple_linear(df_train_clean, df_test_clean):
X_train_cols = list(df_train_clean[['P_1_bid', 'V_1_bid', 'P_1_ask', 'V_1_ask', 'P_2_bid', 'V_2_bid', 'P_2_ask'
, 'V_2_ask']].columns.values)
X_train = np.array(df_train_clean[['P_1_bid', 'V_1_bid', 'P_1_ask', 'V_1_ask', 'P_2_bid', 'V_2_bid', 'P_2_ask'
, 'V_2_ask']])
Y_train = np.array(df_train_clean[['labels']])[:,0]
X_test = np.array(df_test_clean[['P_1_bid', 'V_1_bid', 'P_1_ask', 'V_1_ask', 'P_2_bid', 'V_2_bid', 'P_2_ask'
, 'V_2_ask']])
Y_test = np.array(df_test_clean[['labels']])[:,0]
# Define the labels
labels = np.unique(Y_train)
## # Scale Data
scaler = MinMaxScaler()
X_test = scaler.fit_transform(X_test)
X_train = scaler.fit_transform(X_train)
# Set up the data
logreg = linear_model.LogisticRegression(C=1e5)
# Fit
logreg.fit(X_train, Y_train)
# Predict
Y_hat = logreg.predict(X_test)
Y_probs = logreg.predict_proba(X_test)
## # Misclassification error rate
miss_err = 1-accuracy_score(Y_test, Y_hat)
## # Log Loss
eps = 10^(-15)
logloss = log_loss(Y_test, Y_probs, eps = eps)
##confusion_matrix
confusion_matrix1 = confusion_matrix(y_true=Y_test, y_pred=Y_hat
, labels=labels)
# classification_report
classification_report1 = classification_report(y_true=Y_test, y_pred=Y_hat)
# Output results in a list format
result = []
result.append("confusion_matrix")
result.append(confusion_matrix1)
result.append("classification_report")
result.append(classification_report1)
result.append("logloss")
result.append(logloss)
result.append("miss_err")
result.append(miss_err)
result.append("Y_hat")
result.append(Y_hat)
return result
示例9: feature_scale
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def feature_scale(self, X_train, X_val, X_test):
"Normalize all columns"""
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_std = mms.fit_transform(X_train)
X_val_std = mms.fit_transform(X_val)
X_test_std = mms.fit_transform(X_test)
return X_train_std, X_val_std, X_test_std
示例10: rescaleSalAndStockValues
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def rescaleSalAndStockValues():
from sklearn.preprocessing import MinMaxScaler
import numpy as np
maxStock, maxSal, minStock, minSal = findMaxMinValues()
# define the given sal and stock values
salVal = 200000.0
stockVal = 1000000.0
scaler = MinMaxScaler()
scaledSal = scaler.fit_transform([[maxSal],[minSal],[salVal]])
scaledStock = scaler.fit_transform([[maxStock],[minStock],[stockVal]])
return scaledSal, scaledStock
示例11: use
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def use(method):
if method == 'naive bayes':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('bayes',GaussianNB())]
clf = Pipeline(estimators)
parameters = {"skb__k":[8,9,10,11,12],
"pca__n_components":[2,6,4,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train_scaled, labels_train)
print "features score: "
print SKB_k.scores_
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
elif method == 'svm':
estimators = [('reduce_dim', PCA()), ('svc', SVC())]
clf = Pipeline(estimators)
parameters = {'svc__C': [1,10]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_estimator_
elif method == 'decision tree':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('tree', tree.DecisionTreeClassifier())]
clf = Pipeline(estimators)
parameters = {"tree__min_samples_split": [2,10],"skb__k":[8,9,10,11,12],
"pca__n_components":[2,4,6,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train, labels_train)
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
accuracy = accuracy_score(labels_test, pred)
print "accuracy score:"
print accuracy
calculate_precision_recall(pred, labels_test)
示例12: featureScale
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def featureScale(df):
"""
FEATURE SCALING
"""
scaler = MinMaxScaler()
#print(df['ApplicantIncome'].head())
df[['ApplicantIncome']] = scaler.fit_transform(df[['ApplicantIncome']])
df[['CoapplicantIncome']] = scaler.fit_transform(df[['CoapplicantIncome']])
df[['LoanAmount']] = scaler.fit_transform(df[['LoanAmount']])
df[['Loan_Amount_Term']] = scaler.fit_transform(df[['Loan_Amount_Term']])
print("Scaling Done")
#print(df['ApplicantIncome'].head())
return df
示例13: test_min_max_scaler
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def test_min_max_scaler():
X = iris.data
scaler = MinMaxScaler()
# default params
X_trans = scaler.fit_transform(X)
assert_equal(X_trans.min(axis=0), 0)
assert_equal(X_trans.max(axis=0), 1)
# not default params
scaler = MinMaxScaler(feature_range=(1, 2))
X_trans = scaler.fit_transform(X)
assert_equal(X_trans.min(axis=0), 1)
assert_equal(X_trans.max(axis=0), 2)
示例14: getDNN
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def getDNN(df, random_split=None):
df_tr, df_val = split(df, rand_ratio=random_split)
X, Y = to_array(df.drop("validation", axis=1))
Xtr, Ytr = to_array(df_tr)
Xval, Yval = to_array(df_val)
scaler = MinMaxScaler((0, 1))
Xtr = scaler.fit_transform(Xtr)
Xval = scaler.transform(Xval)
# Start create model
print("Create a DNN Classifier")
model = Sequential()
model.add(Dense(100, input_dim=Xtr.shape[1], activation='tanh'))
model.add(PReLU())
model.add(Dropout(0.2))
model.add(Dense(80, activation='linear'))
model.add(ELU(alpha=0.3))
model.add(Dropout(0.2))
model.add(Dense(60, activation='tanh'))
model.add(PReLU())
model.add(Dropout(0.2))
model.add(Dense(40, activation='linear'))
model.add(ELU(alpha=0.1))
model.add(Dropout(0.2))
model.add(Dense(15, activation='linear'))
model.add(PReLU())
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
# trainer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
trainer = Adadelta(lr=0.1, tho=0.98, epsilon=1e-7)
model.compile(loss='binary_crossentropy', optimizer=trainer)
print(Ytr, Yval)
model.fit(Xtr, Ytr, nb_epoch=30, batch_size=32, verbose=1, validation_data=(Xval, Yval))
pred_tr = model.predict_proba(Xtr)
pred = model.predict_proba(Xval)
print("auc on train: {}".format(roc_auc_score(Ytr, pred_tr)))
print("auc on validation: {}".format(roc_auc_score(Yval, pred)))
X = scaler.fit_transform(X)
model.fit(X, Y, nb_epoch=30, batch_size=32)
return model, scaler
示例15: get_training_data_by_category
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit_transform [as 别名]
def get_training_data_by_category(category, limit=0):
limit_pos = limit*0.2
limit_neg = limit*0.8
N_pos = DataDAO.count_training_data_by_category(category)
if N_pos < limit_pos:
limit_pos = N_pos
limit_neg = N_pos*5
training_data = []
training_target = []
positive = DataDAO.get_training_data_by_category(category)
for ind, sample in enumerate(positive):
if limit != 0 and ind >= limit_pos:
break
training_data.append(sample)
training_target.append(1)
negative = DataDAO.get_training_data_by_other_categories(category)
for ind, sample in enumerate(negative):
if limit != 0 and ind >= limit_neg:
break
training_data.append(sample)
training_target.append(0)
scaler = MinMaxScaler()
training_data_scaled = scaler.fit_transform(training_data)
# training_data_scaled = scale(training_data,axis=0)
tr_data_sparse = csr_matrix(training_data_scaled)
return tr_data_sparse, training_target, scaler