本文整理匯總了Python中sklearn.preprocessing.StandardScaler.fit_transform方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.fit_transform方法的具體用法?Python StandardScaler.fit_transform怎麽用?Python StandardScaler.fit_transform使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.fit_transform方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: logisticRegression
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def logisticRegression():
data = loadtxtAndcsv_data("data1.txt", ",", np.float64)
X = data[:,0:-1]
y = data[:,-1]
# 劃分為訓練集和測試集
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
# 歸一化
scaler = StandardScaler()
# scaler.fit(x_train)
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)
# 邏輯回歸
model = LogisticRegression()
model.fit(x_train,y_train)
# 預測
predict = model.predict(x_test)
right = sum(predict == y_test)
predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1))) # 將預測值和真實值放在一塊,好觀察
print(predict)
print('測試集準確率:%f%%'%(right*100.0/predict.shape[0])) # 計算在測試集上的準確度
示例2: test_same_fit_transform
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def test_same_fit_transform(self):
X, X_rdd = self.make_dense_rdd()
local = StandardScaler()
dist = SparkStandardScaler()
X_trans = local.fit_transform(X)
X_rdd_trans = dist.fit_transform(X_rdd).toarray()
X_converted = dist.to_scikit().transform(X)
assert_array_almost_equal(X_trans, X_rdd_trans)
assert_array_almost_equal(X_trans, X_converted)
local = StandardScaler(with_mean=False)
dist = SparkStandardScaler(with_mean=False)
X_trans = local.fit_transform(X)
X_rdd_trans = dist.fit_transform(X_rdd).toarray()
X_converted = dist.to_scikit().transform(X)
assert_array_almost_equal(X_trans, X_rdd_trans)
assert_array_almost_equal(X_trans, X_converted)
local = StandardScaler(with_std=False)
dist = SparkStandardScaler(with_std=False)
X_trans = local.fit_transform(X)
X_rdd_trans = dist.fit_transform(X_rdd).toarray()
X_converted = dist.to_scikit().transform(X)
assert_array_almost_equal(X_trans, X_rdd_trans)
assert_array_almost_equal(X_trans, X_converted)
示例3: _transform_data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def _transform_data():
from solaris.run import load_data
from solaris.models import LocalModel
data = load_data()
X = data['X_train']
y = data['y_train']
# no shuffle - past-future split
offset = X.shape[0] * 0.5
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
print('_' * 80)
print('transforming data')
print
tf = LocalModel(None)
print('transforming train')
X_train, y_train = tf.transform(X_train, y_train)
print('transforming test')
X_test, y_test = tf.transform(X_test, y_test)
print('fin')
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
scaler = StandardScaler()
y_train = scaler.fit_transform(y_train)
y_test = scaler.transform(y_test)
data = {'X_train': X_train, 'X_test': X_test,
'y_train': y_train, 'y_test': y_test}
joblib.dump(data, 'data/dbndata.pkl')
示例4: generate_dataset
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
"""Generate a regression dataset with the given parameters."""
if verbose:
print("generating dataset...")
X, y, coef = make_regression(n_samples=n_train + n_test,
n_features=n_features, noise=noise, coef=True)
random_seed = 13
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=n_train, random_state=random_seed)
X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
y_test = y_scaler.transform(y_test[:, None])[:, 0]
gc.collect()
if verbose:
print("ok")
return X_train, y_train, X_test, y_test
示例5: remove_outliers
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def remove_outliers(image,mask):
#taking the mask part to image to check the presence of bee
im = cv2.bitwise_and(image,image,mask=mask);
ldp_image,_,_ = ldp.ldp(im);
test_Y = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
test_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
test = np.concatenate((test_Y,test_rgb),axis=1);
mask_not = cv2.bitwise_not(mask);
ret1, mask_not = cv2.threshold (mask_not,np.mean(mask_not), 255, cv2.THRESH_BINARY);
im = cv2.bitwise_and(image,image,mask=mask_not);
ldp_image,_,_ = ldp.ldp(im);
data_ldp = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
data_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
data = np.concatenate((data_rgb,data_ldp),axis=1);
data = data[np.any(data!=0,axis=1)];
print data.shape;
data = data.astype('float64');
data = preprocessing.normalize(data,axis=0);
ss = StandardScaler();
data = ss.fit_transform(data);
clf = svm.OneClassSVM(nu=0.8, kernel="rbf", gamma=0.1)
clf.fit(data);
test = test.astype('float64');
test = preprocessing.normalize(test,axis=0);
print test.shape;
test = ss.fit_transform(test);
test = clf.predict(test);
test = test.reshape((image.shape[0] , image.shape[1]));
test[test==-1] = 0;
test[test==1] = 255;
test = test.astype('uint8');
im = cv2.bitwise_and(image,image,mask=test);
im = cv2.bitwise_and(im,im,mask=mask);
#print test[:,0],test[:,1];
return(im,test);
示例6: DBScan_Flux
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def DBScan_Flux(phots, ycenters, xcenters, dbsClean=0, useTheForce=False):
"""Class methods are similar to regular functions.
Note:
Do not include the `self` parameter in the ``Args`` section.
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
True if successful, False otherwise.
"""
dbsPhots = DBSCAN()#n_jobs=-1)
stdScaler = StandardScaler()
phots = np.copy(phots.ravel())
phots[~np.isfinite(phots)] = np.median(phots[np.isfinite(phots)])
featuresNow = np.transpose([stdScaler.fit_transform(ycenters[:,None]).ravel(), \
stdScaler.fit_transform(xcenters[:,None]).ravel(), \
stdScaler.fit_transform(phots[:,None]).ravel() ] )
# print(featuresNow.shape)
dbsPhotsPred= dbsPhots.fit_predict(featuresNow)
return dbsPhotsPred == dbsClean
示例7: batchScaling
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def batchScaling(in_root="raw", out_root="data", with_mean=True, with_std=True):
Xy_files = filter(lambda x:x.endswith(".Xy.npz"), os.listdir(in_root))
# Xy_files = ["image_rgb_gist.Xy.npz"]
for Xy_file in Xy_files:
in_path = os.path.join( in_root, Xy_file )
out_path = os.path.join( out_root, Xy_file )
print '> load %s' % ( in_path )
data = np.load( in_path )
## detect sparse or dense
_sparse = True if len(data['X'].shape) == 0 else False
print '> scaling'
if _sparse:
## Cannot center sparse matrices: pass `with_mean=False` instead.
print '>> Sparse matrix detected. Use with_mean=False'
scaler = StandardScaler(with_mean=False, with_std=with_std)
X = scaler.fit_transform( data['X'].all() )
else:
scaler = StandardScaler(with_mean=with_mean, with_std=with_std)
X = scaler.fit_transform( data['X'] )
print '> compressing and dumping to %s' % (out_path)
np.savez_compressed(out_path, X=X, y=data['y'])
print '='*50
示例8: TrainValidSplitter
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
class TrainValidSplitter(object):
def __init__(self, standardize=True, few=False):
self.standardize = standardize
self.few = few
self.standa = None
def __call__(self, X, y, net):
strati = StratifiedShuffleSplit(y = y, n_iter = 1, test_size = 0.2, random_state = 1234)
train_indices, valid_indices = next(iter(strati))
if self.standardize:
self.standa = StandardScaler()
if self.few:
X_train = np.hstack((self.standa.fit_transform(X[train_indices,:23]), X[train_indices,23:]))
X_valid = np.hstack((self.standa.transform(X[valid_indices,:23]), X[valid_indices,23:]))
else:
X_train = self.standa.fit_transform(X[train_indices])
X_valid = self.standa.transform(X[valid_indices])
else:
X_train, X_valid = X[train_indices], X[valid_indices]
y_train, y_valid = y[train_indices], y[valid_indices]
return X_train, X_valid, y_train, y_valid
示例9: LinearXGB
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
class LinearXGB(ClippedMixin):
trained = set()
cache = {}
def __init__(self, params, num_rounds):
self.params = params
self.scaler = StandardScaler(with_mean=False)
self.num_rounds = num_rounds
def fit(self, dense, svd, sparse, y):
X_train = np.hstack((dense, svd))
#X_train = hstack((X_train, sparse))
train_hash = hash(str(X_train))
if train_hash not in self.trained:
X_scaled = self.scaler.fit_transform(X_train)
X_scaled = normalize(X_scaled)
dtrain = xgb.DMatrix(X_scaled, label=y)
watchlist = [(dtrain, 'train')]
self.bst = xgb.train(self.params, dtrain, self.num_rounds)#, watchlist)
self.trained.add(train_hash)
def predict(self, dense, svd, sparse):
X_test = np.hstack((dense, svd))
#X_test = hstack((X_test, sparse))
test_hash = hash(str(X_test))
if test_hash not in self.cache:
#X_scaled = X_test
X_scaled = self.scaler.fit_transform(X_test)
X_scaled = normalize(X_scaled)
dtest = xgb.DMatrix(X_scaled)
#dtest = xgb.DMatrix(X_test)
y_pred = self.bst.predict(dtest)
self.cache[test_hash] = y_pred
return self.cache[test_hash]
示例10: prep_X_y
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def prep_X_y(df, constant=False, split=True):
cols_to_exclude = ['venue_state', 'venue_name', 'venue_country', 'venue_address', 'ticket_types', 'email_domain', 'description', 'previous_payouts', 'payee_name', 'org_name', 'org_desc', 'object_id', 'name', 'acct_type', 'country', 'listed', 'currency', 'payout_type', 'channels']
if constant:
df['const'] = 1
X = df.drop(cols_to_exclude + ['fraud'], axis=1).values
y = df['fraud'].values
print 'columns used:\n', df.drop(cols_to_exclude + ['fraud'], axis=1).columns
if split:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
X_smoted, y_smoted = smote(X_train, y_train, target=.5)
return X_smoted, X_test, y_smoted, y_test
else:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_smoted, y_smoted = smote(X, y, target=.5)
return X_smoted, y_smoted
示例11: data_fr
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def data_fr(novel_num):
#if csv_file(novel, novel_num) is True:
nn = str(novel_num)
df_novel = pd.read_csv('novel_'+nn+'list_1.csv', header=None)
try:
df_novel['wrd_length'] = df_novel[0].apply(wrd_lengths)
df_novel['total_char'] = [sum(l) for l in df_novel['wrd_length']]
df_novel['syl_count'] = df_novel[0].apply(syl_count)
df_novel['syl_sum'] = [sum(l) for l in df_novel['syl_count']]
df_novel['sentiment'] = df_novel[0].apply(detect_sentiment)
#create csv for word to syl to improve syl function
d = {}
for l in df_novel[0]:
sent = TextBlob(l)
for x in sent.words:
w = CountSyllables(x)
d[x] = w
with open('novel_'+nn+'list_1_syl.csv', 'wb') as f:
writer = csv.writer(f)
for row in d.iteritems():
writer.writerow(row)
#create cluster columns
df_cluster = df_novel.drop('wrd_length', 1)
df_cluster = df_cluster.drop('syl_count', 1)
X = df_cluster.drop(0, axis = 1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
km = KMeans(n_clusters=20, random_state=1)
km.fit(X_scaled)
df_cluster_20 = df_cluster.copy()
df_cluster_20['cluster'] = km.labels_
df_novel['cluster_20'] = df_cluster_20['cluster']
#Create cluster 3
df_cluster_3 = df_cluster.copy()
X = df_cluster_3.drop(0, axis=1)
X_scaled = scaler.fit_transform(X)
km = KMeans(n_clusters = 3, random_state=1)
km.fit(X_scaled)
df_cluster_3['cluster'] = km.labels_
df_novel['cluster_3_syl'] = df_cluster_3['cluster']
#create cluster 3 no syl
df_cluster_3no_syl = df_cluster.copy()
X = df_cluster_3no_syl.drop(0, axis=1)
X_scaled = scaler.fit_transform(X)
km = KMeans(n_clusters=3, random_state=1)
km.fit(X_scaled)
df_cluster_3no_syl['cluster'] = km.labels_
df_novel['cluster_3no_syl'] = df_cluster_3no_syl['cluster']
#Create 5 clusters
df_cluster_5 = df_cluster.copy()
X = df_cluster_5.drop(0, axis=1)
X_scaled = scaler.fit_transform(X)
km = KMeans(n_clusters=5, random_state=1)
km.fit(X_scaled)
df_cluster_5['cluster'] = km.labels_
df_novel['cluster_5'] = df_cluster_5['cluster']
df_novel.to_csv('novel_'+nn+'list_1.csv', index=False)
except:
rejects_3.append(novel_num)
示例12: train_test
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def train_test(self, X, y, X_test):
"""
"""
sss = StratifiedShuffleSplit(y, 1, test_size=0.5)
for train_id, valid_id in sss:
X0, X1 = X[train_id], X[valid_id]
y0, y1 = y[train_id], y[valid_id]
#First half
w0 = np.zeros(len(y0))
for i in range(len(w0)):
w0[i] = self.w[int(y0[i])]
xg0_train = DMatrix(X0, label=y0, weight=w0)
xg0_test = DMatrix(X1, label=y1)
xgt_test = DMatrix(X_test)
bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
#Calibrated RF
rf = RandomForestClassifier(n_estimators=600, criterion='gini',
class_weight='auto', max_features='auto')
cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
cal.fit(X0, y0)
y0_cal = cal.predict_proba(X1)
yt_cal = cal.predict_proba(X_test)
#Second half
ss = StandardScaler()
y0_pred = ss.fit_transform(y0_pred)
yt_pred = ss.fit_transform(yt_pred)
y0_cal = ss.fit_transform(y0_cal)
yt_cal = ss.fit_transform(yt_cal)
X1 = np.hstack((X1, y0_pred, y0_cal))
X_test = np.hstack((X_test, yt_pred, yt_cal))
w1 = np.zeros(len(y1))
# self.param['eta'] = 0.01
self.num_round = 450
for i in range(len(w1)):
w1[i] = self.w[int(y1[i])]
xg1_train = DMatrix(X1, label=y1, weight=w1)
xg_test= DMatrix(X_test)
bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
return y_pred
示例13: correlation_matching
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
""" Learns correlation matching (CM) over I_tr and T_tr
and applies it to I_tr, T_tr, I_te, T_te
Parameters
----------
I_tr: np.ndarray [shape=(n_tr, d_I)]
image data matrix for training
T_tr: np.ndarray [shape=(n_tr, d_T)]
text data matrix for training
I_te: np.ndarray [shape=(n_te, d_I)]
image data matrix for testing
T_te: np.ndarray [shape=(n_te, d_T)]
text data matrix for testing
n_comps: int > 0 [scalar]
number of canonical componens to use
Returns
-------
I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
image data matrix represetned in correlation space
T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
text data matrix represetned in correlation space
I_te_cca : np.ndarray [shape=(n_te, n_comps)]
image data matrix represetned in correlation space
T_te_cca : np.ndarray [shape=(n_te, n_comps)]
text data matrix represetned in correlation space
"""
# sclale image and text data
I_scaler = StandardScaler()
I_tr = I_scaler.fit_transform(I_tr)
I_te = I_scaler.transform(I_te)
T_scaler = StandardScaler()
T_tr = T_scaler.fit_transform(T_tr)
T_te = T_scaler.transform(T_te)
cca = PLSCanonical(n_components=n_comps, scale=False)
cca.fit(I_tr, T_tr)
I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
I_te_cca, T_te_cca = cca.transform(I_te, T_te)
return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
示例14: stack_features
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def stack_features(params):
"""
Get local features for all training images together
"""
# Init detector and extractor
detector, extractor = init_detect_extract(params)
# Read image names
with open(
os.path.join(params["root"], params["root_save"], params["image_lists"], params["split"] + ".txt"), "r"
) as f:
image_list = f.readlines()
X = []
for image_name in image_list:
# Read image
im = cv2.imread(
os.path.join(params["root"], params["database"], params["split"], "images", image_name.rstrip())
)
# Resize image
im = resize_image(params, im)
feats = image_local_features(im, detector, extractor)
# Stack all local descriptors together
if feats is not None:
if len(X) == 0:
X = feats
else:
X = np.vstack((X, feats))
if params["normalize_feats"]:
X = normalize(X)
if params["whiten"]:
pca = PCA(whiten=True)
pca.fit_transform(X)
else:
pca = None
# Scale data to 0 mean and unit variance
if params["scale"]:
scaler = StandardScaler()
scaler.fit_transform(X)
else:
scaler = None
return X, pca, scaler
示例15: main
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def main():
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data',
header = None,
sep = '\s+')
df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
'LSTAT', 'MEDV']
print(df.head())
# Select a subset of the features and plot the correlation between features
cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']
sns.pairplot(df[cols], size=2.5);
plt.title('Correlations between 5 features')
plt.show()
# Plot a heatmap of the same subset of features
cm = np.corrcoef(df[cols].values.T)
sns.set(font_scale=2.5)
hm = sns.heatmap(cm,
cbar = True,
annot = True,
square = True,
fmt = '.2f',
annot_kws = {'size': 15},
yticklabels = cols,
xticklabels = cols)
plt.show()
X = df[['RM']].values
y = df['MEDV'].values
sc_x = StandardScaler()
sc_y = StandardScaler()
X_std = sc_x.fit_transform(X)
y_std = sc_y.fit_transform(y)
lr = LinearRegressionGD()
lr.fit(X_std, y_std)
plt.plot(range(1, lr.n_iter + 1), lr.cost_)
plt.ylabel('SSE')
plt.xlabel('Epoch')
plt.show()
lin_regplot(X_std, y_std, lr)
plt.xlabel('Average number of rooms [RM] (standardized)')
plt.ylabel('Price in $1000\'s [MEDV] (standardized)')
plt.show()
# Example classification for a house with 5 rooms
num_rooms_std = sc_x.transform([5.0])
price_std = lr.predict(num_rooms_std)
print("Price in $1000's: %.3f" % \
sc_y.inverse_transform(price_std))