本文整理汇总了Python中sklearn.preprocessing.MinMaxScaler.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MinMaxScaler.fit方法的具体用法?Python MinMaxScaler.fit怎么用?Python MinMaxScaler.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.MinMaxScaler
的用法示例。
在下文中一共展示了MinMaxScaler.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: predict_new
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def predict_new(self, input):
model = self.train_model()
assert len(input) == 5 and type(input) == list
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(self.data)
inp = scaler.transform([input])
print(scaler.inverse_transform(model.predict(numpy.array(inp).reshape(1, 1, 5))))
示例2: sample_from_generator
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def sample_from_generator(history, nb_samples, latent_dim=12,
valid_split=0.3, random_split=True,
hidden_dims=None, **kwargs):
scaler = MinMaxScaler()
scaler.fit(history)
scaled = scaler.transform(history)
nb_train = history.shape[0]
if not valid_split:
nb_valid = 0
elif isinstance(valid_split, float):
nb_valid = nb_train - int(np.floor(nb_train*valid_split))
else:
nb_valid = valid_split
if nb_valid > 0:
if random_split:
ind = np.arange(nb_train)
np.random.shuffle(ind)
x_valid = scaled[ind[-nb_valid:], :]
x_train = scaled[ind[:-nb_valid], :]
else:
x_valid = scaled[-nb_valid:, :]
x_train = scaled[:-nb_valid, :]
else:
x_valid = None
x_train = scaled
_, generator = build_model(latent_dim, x_train, x_valid=x_valid,
hidden_dims=hidden_dims, **kwargs)
normal_sample = np.random.standard_normal((nb_samples, latent_dim))
draws = generator.predict(normal_sample)
return scaler.inverse_transform(draws)
示例3: data_organizer
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def data_organizer( instances, outcomes ):
"""
Operations to organize data as desired
"""
excluded_features = set([])
#print( "Using only SAT subject tests" )
#included_features = set(["SATCRDG", "SATMATH", "SATWRTG"])
#print( "Using SAT total and HSGPA" )
#included_features = set(["SATTotal", "HSGPA"])
#print( "Using gender, firstgen, famincome, firstlang" )
#included_features = set(["gender", "Firgen", "famincome", "FirstLang"])
print( "Using all features" )
included_features = set(["gender", "Firgen", "famincome", "SATCRDG", "SATMATH", "SATWRTG", "SATTotal", "HSGPA", "ACTRead", "ACTMath", "ACTEngWrit", "APIScore", "FirstLang", "HSGPAunweighted"])
#print( "SAT subject tests and HSGPA" )
#included_features = set(["SATCRDG", "SATMATH", "SATWRTG", "HSGPA" ])
# Remove instances without GPA data
new_instances = []
new_outcomes = []
for instance,outcome in zip(instances,outcomes):
temp={}
for name,val in zip(ALL_LABELS, instance):
temp[name] = val
u1,u2,gpa = outcome
if not math.isnan( gpa ):
temp_list = []
skip = False
for key in temp.keys():
if key in included_features:
if math.isnan(temp[key]):
skip = True
temp_list.append( temp[key] )
if not skip:
new_outcomes.append( [value for value in outcome] )
new_instances.append( temp_list )
instances = new_instances
outcomes = new_outcomes
# Fill in NaN values with median
instance_list = []
for idx,instance in enumerate(instances):
instance_list.append( [ value for value in instance ] )
bandaid = Imputer( strategy='median' )
instances = bandaid.fit_transform( instance_list )
# Scale to [0,1]
scaler = MinMaxScaler( feature_range=(0,1), copy=False)
scaler.fit( instances )
instances = scaler.fit_transform(instances)
return instances, outcomes, scaler
示例4: NB_coefficients
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def NB_coefficients(year=2010):
poi_dist = getFourSquarePOIDistribution(useRatio=False)
F_taxi = getTaxiFlow(normalization="bydestination")
W2 = generate_geographical_SpatialLag_ca()
Y = retrieve_crime_count(year=year)
C = generate_corina_features()
D = C[1]
popul = C[1][:,0].reshape(C[1].shape[0],1)
Y = np.divide(Y, popul) * 10000
f2 = np.dot(W2, Y)
ftaxi = np.dot(F_taxi, Y)
f = np.concatenate( (D, f2, ftaxi, poi_dist), axis=1 )
mms = MinMaxScaler(copy=False)
mms.fit(f)
mms.transform(f)
header = C[0] + [ 'spatiallag', 'taxiflow'] + \
['POI food', 'POI residence', 'POI travel', 'POI arts entertainment',
'POI outdoors recreation', 'POI education', 'POI nightlife',
'POI professional', 'POI shops', 'POI event']
df = pd.DataFrame(f, columns=header)
np.savetxt("Y.csv", Y, delimiter=",")
df.to_csv("f.csv", sep=",", index=False)
# NB permute
nbres = subprocess.check_output( ['Rscript', 'nbr_eval.R', 'ca', 'coefficient'] )
print nbres
ls = nbres.strip().split(" ")
coef = [float(e) for e in ls]
print coef
return coef, header
示例5: scale_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def scale_data(pitchers):
num_data = pitchers[['Decisions', 'Wins_Over_Decisions',
'Wins_Over_Starts', 'Relief_Appearances',
'Shutout_Percentage', 'Outs_Recorded_Per_Appearance',
'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance',
'Runs_Per_Appearance', 'Home_Runs_Per_Appearance',
'Walks_Per_Appearance', 'Strikeouts_Per_Appearance',
'ERA']]
scaler = MinMaxScaler()
scaler.fit(num_data)
num_data = scaler.transform(num_data)
num_data = pd.DataFrame(num_data)
num_data.columns = ['Decisions', 'Wins_Over_Decisions',
'Wins_Over_Starts', 'Relief_Appearances',
'Shutout_Percentage', 'Outs_Recorded_Per_Appearance',
'Hits_Allowed_Per_Appearance', 'Earned_Runs_Per_Appearance',
'Runs_Per_Appearance', 'Home_Runs_Per_Appearance',
'Walks_Per_Appearance', 'Strikeouts_Per_Appearance',
'ERA']
pitchers = pitchers[['Player_and_Year']]
pitchers = pd.merge(pitchers, num_data, how='inner', left_index=True,
right_index=True)
return pitchers
示例6: NumericColumn
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
class NumericColumn(BaseEstimator, TransformerMixin):
'''
Take a numeric value column and standardize it.
'''
def __init__(self):
'''
Set up the internal transformation.
'''
self._transformer = MinMaxScaler()
def fit(self, X, y=None):
'''
Fit the standardization.
'''
zeroed = pd.DataFrame(np.array(X).reshape(-1, 1)).fillna(0)
self._transformer.fit(zeroed)
return self
def transform(self, X):
'''
Transform a column of data into numerical percentage values.
Parameters
----------
X : pandas series or numpy array
'''
zeroed = pd.DataFrame(np.array(X).reshape(-1, 1)).fillna(0)
return self._transformer.transform(zeroed).astype(np.float32)
示例7: preprocess_datasets
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def preprocess_datasets(X_train, X_test, args):
if 'scale' in args.preprocessing:
print('Scaling features to range [-1,1] ...')
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(np.vstack(X_train))
X_train = [scaler.transform(X_curr) for X_curr in X_train]
X_test = [scaler.transform(X_curr) for X_curr in X_test]
return X_train, X_test
示例8: transform
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def transform(self, fp):
fm = FeaturePool(fp).meta()
x = FeaturePool(fp).array()
scaler = MinMaxScaler(feature_range = self.feature_range)
scaler.fit(x)
for f in FeaturePool.from_array(fm, scaler.transform(x)):
yield f
示例9: preprocess_datasets
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def preprocess_datasets(train, test, args):
if 'scale' in args.preprocessing:
print('Scaling features to range [-1,1] ...')
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(np.vstack(train.X))
processed_train = Dataset([scaler.transform(X_curr) for X_curr in train.X], train.y, train.target_names, train.groups)
processed_test = Dataset([scaler.transform(X_curr) for X_curr in test.X], test.y, test.target_names, test.groups)
else:
processed_train = train
processed_test = test
return processed_train, processed_test
示例10: preprocess_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def preprocess_data(X, scaler=None):
if not scaler:
#add log to data
X = np.log(1+X)
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
#add gaussian noise
mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma)
#X = X + s
return X, scaler
示例11: __init__
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
class SerialDataScaler:
def __init__(self, data):
data = numpy.reshape(data, (len(data), 1))
data = data.astype("float32")
self.scaler = MinMaxScaler(feature_range=(0, 1))
self.scaler.fit(data)
def transform(self, X):
#return X
return self.scaler.transform(numpy.reshape(X, (len(X), 1)))
def inverse_transform(self, x):
return self.scaler.inverse_transform(x)
示例12: test_minmaxscaler_vs_sklearn
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def test_minmaxscaler_vs_sklearn():
# Compare msmbuilder.preprocessing.MinMaxScaler
# with sklearn.preprocessing.MinMaxScaler
minmaxscalerr = MinMaxScalerR()
minmaxscalerr.fit(np.concatenate(trajs))
minmaxscaler = MinMaxScaler()
minmaxscaler.fit(trajs)
y_ref1 = minmaxscalerr.transform(trajs[0])
y1 = minmaxscaler.transform(trajs)[0]
np.testing.assert_array_almost_equal(y_ref1, y1)
示例13: organize_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def organize_data(train_size=59872):
#Used 59872, which is 80%, rounded in a fashion to use large mini-batches that align in size
with open('dev_df.pkl', 'r') as f:
dev_df = pd.DataFrame(cPickle.load(f))
# Training/CV set
gender_age_train = pd.read_csv('gender_age_train.csv', index_col=0).drop(['gender', 'age'], axis=1)
gender_age_train = gender_age_train.join(dev_df)
# Test set
gender_age_test = pd.read_csv('gender_age_test.csv', index_col=0)
gender_age_test = gender_age_test.join(dev_df)
# Labels will be in y array; features will be in X matrix; need to encode labels
# for phone_brand, device_model, and group
X = np.array(gender_age_train)
X_test = np.array(gender_age_test)
# Row 0 is the group to be classified, so put it in y array then delete it
y = X[:,0]
from sklearn.preprocessing import LabelEncoder
le_y = LabelEncoder()
y = le_y.fit_transform(y)
X = np.delete(X,0,1)
# Reformat all labeled columns with label encoders
le_phone_brand = LabelEncoder()
le_phone_brand.fit(np.hstack((X[:,0], X_test[:,0])))
X[:,0] = le_phone_brand.transform(X[:,0])
X_test[:,0] = le_phone_brand.transform(X_test[:,0])
le_device_model = LabelEncoder()
le_device_model.fit(np.hstack((X[:,1], X_test[:,1])))
X[:,1] = le_device_model.transform(X[:,1])
X_test[:,1] = le_device_model.transform(X_test[:,1])
# Standardize features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(np.vstack((X, X_test)))
X = scaler.transform(X)
X_test = scaler.transform(X_test)
# Create CV set
from sklearn.cross_validation import train_test_split
X_train, X_cv, y_train, y_cv = train_test_split(X, y, train_size=train_size, random_state=0)
return X_train, X_cv, y_train, y_cv, X_test
示例14: _scaled_data
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
def _scaled_data(self):
"""Load scaled data.
Args:
None
Returns:
(scaler, train, test): Tuple of list of train and test data
"""
# Initialize key variables
(_train, _test) = self._data()
# Fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(_train)
# Transform train
train = _train.reshape(_train.shape[0], _train.shape[1])
train_scaled = scaler.transform(train)
# Transform test
test = _test.reshape(_test.shape[0], _test.shape[1])
test_scaled = scaler.transform(test)
# Return
return scaler, train_scaled, test_scaled
示例15: log_minmax
# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import fit [as 别名]
class log_minmax(sklearn.base.BaseEstimator,
sklearn.base.TransformerMixin):
'''Transformer that first takes log1p(X) then calls the minMaxScaler transformer'''
def __init__(self):
self.mm_tran = MinMaxScaler()
def fit(self, X, y=None):
self.mm_tran.fit(np.log1p(X),y)
return self
def transform(self, X):
Xt = self.mm_tran.transform(np.log1p(X))
return Xt
def fit_transform(self, X, y=None):
self.fit(X)
return self.transform(X)