本文整理汇总了Python中sklearn.preprocessing.MinMaxScaler.transform方法的典型用法代码示例。如果您正苦于以下问题:Python MinMaxScaler.transform方法的具体用法?Python MinMaxScaler.transform怎么用?Python MinMaxScaler.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.MinMaxScaler
的用法示例。
在下文中一共展示了MinMaxScaler.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Iris
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def Iris(training_size, test_size, n, PLOT_DATA):
class_labels = [r'A', r'B', r'C']
data, target = datasets.load_iris(True)
sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=1, random_state=42)
# Now we standarize for gaussian around 0 with unit variance
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)
# Scale to the range (-1,+1)
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)
# Pick training size number of samples from each distro
training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
test_input = {key: (sample_train[label_train == k, :])[training_size:(
training_size+test_size)] for k, key in enumerate(class_labels)}
if PLOT_DATA:
for k in range(0, 3):
plt.scatter(sample_train[label_train == k, 0][:training_size],
sample_train[label_train == k, 1][:training_size])
plt.title("Iris dataset")
plt.show()
return sample_train, training_input, test_input, class_labels
示例2: prescale_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def prescale_data(x_train, x_test, method):
"""
Pre-scales training data and (optionally test data) using the specified method.
:param x_train: The training data to be pre-scaled.
:param x_test: The (optional) test data to be pre-scaled. Beware that the prescaler is only fit to the training
data and not to the test data.
:param method: The method to be used for prescaling. Allowed values are "minmaxscaler" and "standartscaler"
:return: A tuple of the pre-scaled training and test data or only the training data if the test data was set to None
"""
if method is not None:
scaler = None
if method == "minmaxscaler":
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
if method == "standartscaler":
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
if scaler is None:
raise ValueError("Invalid pre-scaling method: {}".format(method))
scaler.fit_transform(x_train)
x_train = scaler.transform(x_train)
if x_test is not None:
x_test = scaler.transform(x_test)
if x_test is not None:
return x_train, x_test
else:
return x_train
示例3: _scaled_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def _scaled_data(self):
"""Load scaled data.
Args:
None
Returns:
(scaler, train, test): Tuple of list of train and test data
"""
# Initialize key variables
(_train, _test) = self._data()
# Fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(_train)
# Transform train
train = _train.reshape(_train.shape[0], _train.shape[1])
train_scaled = scaler.transform(train)
# Transform test
test = _test.reshape(_test.shape[0], _test.shape[1])
test_scaled = scaler.transform(test)
# Return
return scaler, train_scaled, test_scaled
示例4: NB_coefficients
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def NB_coefficients(year=2010):
poi_dist = getFourSquarePOIDistribution(useRatio=False)
F_taxi = getTaxiFlow(normalization="bydestination")
W2 = generate_geographical_SpatialLag_ca()
Y = retrieve_crime_count(year=year)
C = generate_corina_features()
D = C[1]
popul = C[1][:,0].reshape(C[1].shape[0],1)
Y = np.divide(Y, popul) * 10000
f2 = np.dot(W2, Y)
ftaxi = np.dot(F_taxi, Y)
f = np.concatenate( (D, f2, ftaxi, poi_dist), axis=1 )
mms = MinMaxScaler(copy=False)
mms.fit(f)
mms.transform(f)
header = C[0] + [ 'spatiallag', 'taxiflow'] + \
['POI food', 'POI residence', 'POI travel', 'POI arts entertainment',
'POI outdoors recreation', 'POI education', 'POI nightlife',
'POI professional', 'POI shops', 'POI event']
df = pd.DataFrame(f, columns=header)
np.savetxt("Y.csv", Y, delimiter=",")
df.to_csv("f.csv", sep=",", index=False)
# NB permute
nbres = subprocess.check_output( ['Rscript', 'nbr_eval.R', 'ca', 'coefficient'] )
print nbres
ls = nbres.strip().split(" ")
coef = [float(e) for e in ls]
print coef
return coef, header
示例5: Breast_cancer
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def Breast_cancer(training_size, test_size, n, PLOT_DATA):
class_labels = [r'A', r'B']
data, target = datasets.load_breast_cancer(True)
sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)
# Now we standarize for gaussian around 0 with unit variance
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)
# Now reduce number of features to number of qubits
pca = PCA(n_components=n).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)
# Scale to the range (-1,+1)
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)
# Pick training size number of samples from each distro
training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
test_input = {key: (sample_train[label_train == k, :])[training_size:(
training_size+test_size)] for k, key in enumerate(class_labels)}
if PLOT_DATA:
for k in range(0, 2):
plt.scatter(sample_train[label_train == k, 0][:training_size],
sample_train[label_train == k, 1][:training_size])
plt.title("PCA dim. reduced Breast cancer dataset")
plt.show()
return sample_train, training_input, test_input, class_labels
示例6: train
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def train(mode):
if mode == "NextWeek":
DATA = "MLprojectOutput/week34567to8Formated/part-00000"
else:
DATA = "MLprojectOutput/week34567to9Formated/part-00000"
X, Y = readData(DATA, 10000, -1)
X_Scaler = MinMaxScaler().fit(X)
joblib.dump(X_Scaler, 'Predict{0}_Scaler.pkl'.format(mode))
X = X_Scaler.transform(X)
dtrain = xgb.DMatrix(X, label = Y)
param = { 'booster':"gbtree",
'eta':0.3,
'max_depth':6,
'subsample':0.85,
'colsample_bytree':0.7,
'silent':0,
'objective':'reg:linear',
'nthread':10,
'eval_metric':'rmse'}
__model = xgb.train(param.items(), dtrain)
__model.save_model('Predict{0}.model'.format(mode))
X_TEST, Y_TEST = readData(DATA, 0, 10000)
X_TEST = X_Scaler.transform(X_TEST)
dtest = xgb.DMatrix(X_TEST)
Y_pred = list(map(lambda x: int(x), __model.predict(dtest)))
evaluate(Y_TEST,Y_pred)
示例7: train_lr
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def train_lr( X, y, X_test, y_test, t, col_names = None, sample_weight = None ):
sc = MinMaxScaler().fit(X)
X = sc.transform(X)
start = datetime.datetime.now()
X_test_trans = sc.transform(X_test)
print("training balanced LR..")
lr = linear_model.LogisticRegression(class_weight='balanced')
if sample_weight is not None:
lr.fit(X, y, sample_weight)
else:
lr.fit(X, y)
print("training mean accuracy = %.2f" % lr.score(X, y))
print("testing mean accuracy = %.2f" % lr.score(X_test_trans, y_test))
if col_names is not None:
c = np.column_stack((col_names, np.round(lr.coef_.flatten(),2)))
sorted_c = c[c[:,1].argsort()]
print(sorted_c[:10])
print(sorted_c[-10:])
y_prob = lr.predict_proba(X_test_trans)
end = datetime.datetime.now()
delta = end - start
y_pred = y_prob[:, 1] > t
y_pred = y_pred.astype('uint8')
print('--- t = %.2f results:' % t)
print_results(y_test, y_pred)
print('total time predictions: %f (s)' % delta.total_seconds())
print('time per query: %f (s)' % (delta.total_seconds() / len(y_pred)))
false_preds = y_pred != y
false_vectors = np.multiply(lr.coef_ * X[false_preds, :])
c = np.column_stack((col_names, np.round(lr.coef_.flatten(),2)))
print(vector.shape)
return y_pred
示例8: NMFReducer
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
class NMFReducer():
def __init__(self, dataset, dataset_name, num_components=10):
self.dataset = dataset
self.dataset_name = dataset_name
self.labels = dataset.target
self.scaler = MinMaxScaler()
self.data = self.scaler.fit_transform(dataset.data)
self.n_samples, self.n_features = self.data.shape
self.reducer = NMF(n_components=num_components, max_iter=5000)
def reduce(self):
self.reducer.fit(self.data)
self.reduced = self.scaler.fit_transform(self.reducer.transform(self.data))
return self.reduced
def benchmark(self, estimator, name, data):
t0 = time()
sample_size = 300
labels = self.labels
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))
def display_reduced_digits(self):
sys.stdout = open('out/NMFReduceDigitsOutput.txt', 'w')
print("NMF Reduction of %s:\n" % self.dataset_name)
print(40 * '-')
print(self.reduced)
print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
print(40 * '-')
print(self.reducer.reconstruction_err_)
def display_reduced_iris(self):
sys.stdout = open('out/NMFReduceIrisOutput.txt', 'w')
print("NMF Reduction of %s:\n" % self.dataset_name)
print(40 * '-')
print(self.reduced)
print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
print(40 * '-')
print(self.reducer.reconstruction_err_)
def reduce_crossvalidation_set(self, X_train, X_test):
self.reducer.fit(X_train)
reduced_X_train = self.scaler.transform(X_train)
reduced_X_test = self.scaler.transform(X_test)
return reduced_X_train, reduced_X_test
示例9: preprocess_datasets
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_datasets(X_train, X_test, args):
if 'scale' in args.preprocessing:
print('Scaling features to range [-1,1] ...')
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(np.vstack(X_train))
X_train = [scaler.transform(X_curr) for X_curr in X_train]
X_test = [scaler.transform(X_curr) for X_curr in X_test]
return X_train, X_test
示例10: preprocess_datasets
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_datasets(train, test, args):
if 'scale' in args.preprocessing:
print('Scaling features to range [-1,1] ...')
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(np.vstack(train.X))
processed_train = Dataset([scaler.transform(X_curr) for X_curr in train.X], train.y, train.target_names, train.groups)
processed_test = Dataset([scaler.transform(X_curr) for X_curr in test.X], test.y, test.target_names, test.groups)
else:
processed_train = train
processed_test = test
return processed_train, processed_test
示例11: scale
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale(train, test):
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
示例12: use
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def use(method):
if method == 'naive bayes':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('bayes',GaussianNB())]
clf = Pipeline(estimators)
parameters = {"skb__k":[8,9,10,11,12],
"pca__n_components":[2,6,4,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train_scaled, labels_train)
print "features score: "
print SKB_k.scores_
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
elif method == 'svm':
estimators = [('reduce_dim', PCA()), ('svc', SVC())]
clf = Pipeline(estimators)
parameters = {'svc__C': [1,10]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_estimator_
elif method == 'decision tree':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('tree', tree.DecisionTreeClassifier())]
clf = Pipeline(estimators)
parameters = {"tree__min_samples_split": [2,10],"skb__k":[8,9,10,11,12],
"pca__n_components":[2,4,6,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train, labels_train)
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
accuracy = accuracy_score(labels_test, pred)
print "accuracy score:"
print accuracy
calculate_precision_recall(pred, labels_test)
示例13: scale
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale(X_train, X_test):
"""
This function takes two parameters, the Training & Testing samples/features and returns the respective normalized/scaled
versions.
:param X_train: Training set samples
:param X_test: Testing set samples
:return: tuple of normalized/scaled Training & Testing sets samples
"""
scaler = MinMaxScaler().fit(X_train) #scaler object fitted on training set of samples
scaled_X_train = scaler.transform(X_train) #transformed normalized data - Training set samples
scaled_X_test = scaler.transform(X_test) #transformed normalized data - Testing set samples
return (scaled_X_train, scaled_X_test)
示例14: scale_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale_data(train, test):
scaler = MinMaxScaler(feature_range=(-1, 1))
# determine max and min values on training set (per feature) (scale training set with it)
scaler = scaler.fit(train)
train_scaled = scaler.transform(train)
# apply the found parameters to test set (DO NOT compute them again)
test_scaled = scaler.transform(test)
return train_scaled, test_scaled
示例15: preprocess_split_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_split_data(train_data, valid_data, test_data,
feature_mode='framewise', non_overlap=False,
non_overlap_chunk_size=10, use_min_max=False):
# NOTE: This function mutates data so there aren't extra copies
# Remove overlapping frames if no overlap
if non_overlap:
remove_data_overlap(train_data, chunk_size=non_overlap_chunk_size)
if valid_data:
remove_data_overlap(valid_data, chunk_size=non_overlap_chunk_size)
remove_data_overlap(test_data, chunk_size=non_overlap_chunk_size)
# Apply min max scaling to data
min_max_scaler = MinMaxScaler()
if use_min_max:
train_data['features'] = min_max_scaler.fit_transform(
train_data['features'])
if valid_data:
valid_data['features'] = min_max_scaler.transform(valid_data['features'])
test_data['features'] = min_max_scaler.transform(test_data['features'])
if feature_mode == 'framewise':
# Expand training and validation labels to apply to each frame
expand_framewise_labels(train_data)
if valid_data:
expand_framewise_labels(valid_data)
elif feature_mode == 'stats':
# Summarize frames in each file using summary statistics
framewise_to_stats(train_data)
if valid_data:
framewise_to_stats(valid_data)
framewise_to_stats(test_data)
else:
raise ValueError('Invalid feature mode: {}'.format(feature_mode))
# Standardize features
stdizer = StandardScaler()
train_data['features'] = stdizer.fit_transform(train_data['features'])
if valid_data:
valid_data['features'] = stdizer.transform(valid_data['features'])
test_data['features'] = stdizer.transform(test_data['features'])
# Shuffle training data
num_train_examples = len(train_data['labels'])
shuffle_idxs = np.random.permutation(num_train_examples)
reverse_shuffle_idxs = np.argsort(shuffle_idxs)
train_data['features'] = train_data['features'][shuffle_idxs]
train_data['labels'] = train_data['labels'][shuffle_idxs]
train_data['file_idxs'] = [reverse_shuffle_idxs[slice(*pair)]
for pair in train_data['file_idxs']]
return min_max_scaler, stdizer