当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.MinMaxScaler类代码示例

本文整理汇总了Python中sklearn.preprocessing.MinMaxScaler的典型用法代码示例。如果您正苦于以下问题:Python MinMaxScaler类的具体用法?Python MinMaxScaler怎么用?Python MinMaxScaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MinMaxScaler类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_min_max_scaler_iris

def test_min_max_scaler_iris():
    X = iris.data
    scaler = MinMaxScaler()
    # default params
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 0)
    assert_array_almost_equal(X_trans.min(axis=0), 0)
    assert_array_almost_equal(X_trans.max(axis=0), 1)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # not default params: min=1, max=2
    scaler = MinMaxScaler(feature_range=(1, 2))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 1)
    assert_array_almost_equal(X_trans.max(axis=0), 2)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # min=-.5, max=.6
    scaler = MinMaxScaler(feature_range=(-.5, .6))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), -.5)
    assert_array_almost_equal(X_trans.max(axis=0), .6)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # raises on invalid range
    scaler = MinMaxScaler(feature_range=(2, 1))
    assert_raises(ValueError, scaler.fit, X)
开发者ID:abouaziz,项目名称:scikit-learn,代码行数:30,代码来源:test_preprocessing.py

示例2: loaddataset

    def loaddataset(self,path,module):                
       df=pd.read_csv(path)
       subdf = df[['PassengerId','Pclass','Sex','Age','Embarked','Fare','SibSp','Parch']]
       SibSp=subdf['SibSp']
       Parch=subdf['Parch']
#      supplement Age
       Age=subdf['Age'].fillna(value=subdf.Age.mean())
             
       Fare=subdf['Fare'].fillna(value=subdf.Fare.mean())
       
       dummies_Sex=pd.get_dummies(subdf['Sex'],prefix='Sex')
       
       dummies_Embarked = pd.get_dummies(subdf['Embarked'], prefix= 'Embarked')     
       
       dummies_Pclass = pd.get_dummies(subdf['Pclass'], prefix= 'Pclass')
       
       PassengerId=subdf['PassengerId']
       
#      Age&Fare to Scaler
       scaler=MinMaxScaler()
       age_scaled=scaler.fit_transform(Age.values)
       fare_scaled=scaler.fit_transform(Fare.values)
       
       Age_Scaled=pd.DataFrame(age_scaled,columns=['Age_Scaled'])
       Fare_Scaled=pd.DataFrame(fare_scaled,columns=['Fare_Scaled'])
       
       if module=='train':
          self.trainlabel=df.Survived
          self.trainset=pd.concat([dummies_Pclass,dummies_Sex,dummies_Embarked,Age_Scaled,Fare_Scaled,SibSp,Parch],axis=1)
       elif module=='test':
          self.testset=pd.concat([PassengerId,dummies_Pclass,dummies_Sex,dummies_Embarked,Age_Scaled,Fare_Scaled,SibSp,Parch],axis=1)
开发者ID:NatureBlack,项目名称:Kaggle,代码行数:31,代码来源:titanic.py

示例3: NB_coefficients

def NB_coefficients(year=2010):
    poi_dist = getFourSquarePOIDistribution(useRatio=False)
    F_taxi = getTaxiFlow(normalization="bydestination")
    W2 = generate_geographical_SpatialLag_ca()
    Y = retrieve_crime_count(year=year)
    C = generate_corina_features()
    D = C[1]

    popul = C[1][:,0].reshape(C[1].shape[0],1)
    Y = np.divide(Y, popul) * 10000
    
    f2 = np.dot(W2, Y)
    ftaxi = np.dot(F_taxi, Y)
    
    f = np.concatenate( (D, f2, ftaxi, poi_dist), axis=1 )
    mms = MinMaxScaler(copy=False)
    mms.fit(f)
    mms.transform(f)
    header = C[0] + [ 'spatiallag', 'taxiflow'] + \
        ['POI food', 'POI residence', 'POI travel', 'POI arts entertainment', 
                       'POI outdoors recreation', 'POI education', 'POI nightlife', 
                       'POI professional', 'POI shops', 'POI event']
    df = pd.DataFrame(f, columns=header)
    
    np.savetxt("Y.csv", Y, delimiter=",")
    df.to_csv("f.csv", sep=",", index=False)
    
    # NB permute
    nbres = subprocess.check_output( ['Rscript', 'nbr_eval.R', 'ca', 'coefficient'] )
    print nbres
    
    ls = nbres.strip().split(" ")
    coef = [float(e) for e in ls]
    print coef
    return coef, header
开发者ID:thekingofkings,项目名称:chicago-crime,代码行数:35,代码来源:NBRegression.py

示例4: rank_to_dict

def rank_to_dict(ranks, names, order=1, ratio=1):
	minmax = MinMaxScaler()
	ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0]
	if np.mean(ranks) == 0:
		ranks+=1
	ranks = map(lambda x: round(x, 2), ranks)
	return dict(zip(names, ranks ))
开发者ID:Jun321,项目名称:kaggle_project,代码行数:7,代码来源:data_analyse.py

示例5: vary_border

def vary_border(pred_true,y,num_iter=101):
    mms = MinMaxScaler()
    pred=pred_true.copy()
    pred=mms.fit_transform(pred)
    best_score = 0
    for k1 in range(num_iter):
        c1 = k1/(num_iter-1)
        for k2 in range(num_iter):
            c2 = k2/(num_iter-1)
            for k3 in range(num_iter):
                c3 = k3/(num_iter-1)
                if c1 < c2 and c1 < c3 and c2 < c3 and c1 > 0.25 and c1 < 0.5 and c3 < 0.9:
                    tmp_pred = pred.copy()
                    mask1 = tmp_pred < c1
                    mask2 = (tmp_pred >=c1) * (tmp_pred < c2)
                    mask3 = (tmp_pred >=c2) * (tmp_pred < c3)
                    mask4 = tmp_pred >=c3
                    tmp_pred[mask1] = 1
                    tmp_pred[mask2] = 2
                    tmp_pred[mask3] = 3
                    tmp_pred[mask4] = 4
                    score = quadratic_weighted_kappa(y,tmp_pred)
                    if score > best_score:
                        best_score = score
                        best_coef = [c1,c2,c3]
                        best_pred = tmp_pred.copy()
    #print(best_score,best_coef)
    return best_pred, best_coef
开发者ID:BabelTower,项目名称:kaggle-crowdflower,代码行数:28,代码来源:utility.py

示例6: analysis_7

def analysis_7(df_Coredata):
	""" 多次元多項式モデル """

	#https://www.jeremyjordan.me/polynomial-regression/

	X = df_Coredata[['d','e','f','g','i']]
	y = df_Coredata['j']

	# グラフのスタイルを指定
	sns.set(style = 'whitegrid', context = 'notebook')
	# 変数のペアの関係をプロット
	#sns.pairplot(df_Coredata)
	#plt.show()


	#X_train, X_test, y_train, y_test  =  train_test_split(X,y,random_state = 0)
	#lr = linear_model.LinearRegression().fit(X_train, y_train)
	#print("Trainng set score: {:.2f}".format(lr.score(X_train, y_train)))
	#print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

	### データのスケール変換
	# 標準化
	std_Scaler = StandardScaler()
	data_std = std_Scaler.fit_transform(X)

	mmx_Scaler =MinMaxScaler()
	X_scaled = mmx_Scaler.fit_transform(X)
	#X_test_scaled = scaler.transform(X_test)

	#print(X_train_scaled)

	poly = PolynomialFeatures(degree = 2).fit(data_std)
	print(poly.get_feature_names())
开发者ID:Yotaro723,项目名称:test_2,代码行数:33,代码来源:DoE.py

示例7: scale

 def scale(self):
     # Scaling is an important part of this process: many of our algorithms
     # require our data to be scaled or otherwise standardized. We 
     # do this by scaling features to values between [0,1]. This preserves
     # zero entries in our sparse matrix which is always a desirable 
     # quality when working with this sort of data.
     # Scaling is sort of a convoluted process because Scipy/Scikit
     # doesn't offer a way to do this natively. We transpose the matrix, 
     # convert it to LIL format (which isn't inefficient in this operation),
     # and divide each row (column in the original matrix) by the row's
     # sum before transposing and converting back to CSR. 
     # However, if the matrix is not sparse, we don't have to worry about
     # this and can simply use one of Scikit's utility methods.
     # TODO: Maybe look at profiling to ensure that this strategy really
     # is the least expensive one.
     if self.sparse:
         self.vecs = self.vecs.tolil()
         self.vecs = self.vecs.transpose()
         num_features, _ = self.vecs.shape
         for i in range(num_features):
             self.vecs[i] /= self.vecs[i].sum()
         self.vecs = self.vecs.transpose()
         self.vecs = self.vecs.tocsr()
     else:
         mms = MinMaxScaler(copy = False)
         self.vecs = mms.fit_transform(self.vecs)
开发者ID:clovis,项目名称:philoyore,代码行数:26,代码来源:corpus.py

示例8: fit

    def fit(self, X, y):
        X = np.matrix(X)
        y = np.matrix(y)
        self._outputNormalizer = MinMaxScaler()
        self._inputNormalizer = MinMaxScaler()
        self._outputNormalizer = self._outputNormalizer.fit(y)
        self._inputNormalizer = self._inputNormalizer.fit(X)
        self._inputDimension = X.shape[1]
        self._outputDimension = y.shape[1]#For now, hardcoded to 1-dimensional regression problems.
        if (not self._warmStart or self._weights == None):
            self._initializeWeights()
            self._lastDelta = None
        batchFeatures, batchTargets = self._batchify(np.matrix(self._inputNormalizer.transform(X)), self._batchSize,
                                                     np.matrix(self._outputNormalizer.transform(y)))


        #do for each step until the maximum steps:
        for i in range(self._maxSteps):
            reducedLearningRate = self._learningRate * self._shrinkage ** self._step
            for j in range(len(batchFeatures)):
                deltaW = self._learnFromBatch(batchFeatures[j], batchTargets[j])
                if (self._lastDelta == None):
                    self._lastDelta = deltaW
                for k in range(len(self._weights)):
                    self._lastDelta[k] = ((1-self._momentum) * deltaW[k] + self._momentum * self._lastDelta[k])
                    self._weights[k] = self._weights[k] + reducedLearningRate * self._lastDelta[k]
                #self._positifyWeights()
            self._step += 1
        #print(step)
        return self
开发者ID:fdiehl,项目名称:NNForSKLearn,代码行数:30,代码来源:NeuralNetwork.py

示例9: getips

def getips(conf, net, superpixels_num, layer='inner_product_target'):
    (options, args) = parser.parse_args()
    layer = options.layer
    data = net.blobs[layer].data
    #data = net.blobs['InnerProduct1'].data
    feature_len = data.shape[1]
    try:
        negative_numbers = conf.model['number_of_negatives']
    except:
        negative_numbers = 1
    reps = np.zeros((superpixels_num*negative_numbers, feature_len))
    for i in xrange(superpixels_num):
        if i%1000==1:
            print i
        net.forward()
        reps[i] = np.sum(net.blobs[layer].data, axis=1)
    reps_slice = reps[..., 0]
    from sklearn.preprocessing import MinMaxScaler
    clf = MinMaxScaler()
    reps_slice = clf.fit_transform(reps_slice)
    if negative_numbers > 1:
        reps_slice = np.square(reps_slice)
    #reps_slice[reps_slice<np.mean(reps_slice)] = 0
    for i in xrange(reps_slice.shape[0]):
        reps[i] = reps_slice[i]
        # print net.blobs['inner_product_target'].data[1:10]
    return reps
开发者ID:sri3705,项目名称:embedding_segmentation,代码行数:27,代码来源:compute_similarities.py

示例10: cluster

def cluster(final_data_dict, cluster_range, list_or_dict):
    final_data_list= clustering_module.convert_to_list(final_data_dict) 
    respondent_IDs = np.array(map(int, final_data_dict.keys()))
    feature_names = final_data_dict.values()[0].keys()
    final_data_list_imputed = clustering_module.preprocess(final_data_list)
    Scaler = MinMaxScaler()    
    final_data_list_scaled = Scaler.fit_transform(final_data_list_imputed)
    #Transformed is distance of each respondent from each cluster center
    #Predicted is the cluster membership of each respondent
    merging_list = clustering_module.convert_to_list(final_data_dict,remove_NaN=0 )
    data = list(merging_list)
    ignore_set_added = set(['ids'])
    for num_clusters in cluster_range:    
        transformed, predicted, score = clustering_module.clustering(final_data_list_scaled, num_clusters)
        cluster_name = "%s_clusters" % num_clusters
        ignore_set_added.add(cluster_name)    
        data, feature_names = clustering_module.add_new_data_to_rows(predicted, data, feature_names, [cluster_name])
    data, feature_names = clustering_module.add_new_data_to_rows(respondent_IDs, data, feature_names, ["ids"], "before")
    if list_or_dict == "dict":        
        temp = dictionary_conversion.create_dictionary(data, feature_names)    
        num_converted = dictionary_conversion.convert_values_to_int(temp)    
        #Set of features that should be different due to being categorical
        ignore_set_changed = set(['busgrn', 'peopgrn', 'sex', 'race', 'topprob1', 'topprob2'])    
        verdict = compare_respondent_dicts(respondent_IDs, num_converted, final_data_dict, ignore_set_changed, ignore_set_added)
        return num_converted, verdict
    elif list_or_dict == "list":
        return data, feature_names
开发者ID:dent424,项目名称:clustering_project,代码行数:27,代码来源:clustering_suite.py

示例11: sample_from_generator

def sample_from_generator(history, nb_samples, latent_dim=12, 
                          valid_split=0.3, random_split=True,
                          hidden_dims=None, **kwargs):
    scaler = MinMaxScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    
    nb_train = history.shape[0]    
    if not valid_split:
        nb_valid = 0
    elif isinstance(valid_split, float):
        nb_valid = nb_train - int(np.floor(nb_train*valid_split))
    else:
        nb_valid = valid_split
        
    if nb_valid > 0:
        if random_split:
            ind = np.arange(nb_train)
            np.random.shuffle(ind)
            x_valid = scaled[ind[-nb_valid:], :]
            x_train = scaled[ind[:-nb_valid], :]
        else:
            x_valid = scaled[-nb_valid:, :]
            x_train = scaled[:-nb_valid, :]
    else:
        x_valid = None
        x_train = scaled
    
    _, generator = build_model(latent_dim, x_train, x_valid=x_valid, 
                               hidden_dims=hidden_dims, **kwargs)
    
    normal_sample = np.random.standard_normal((nb_samples, latent_dim))
    draws = generator.predict(normal_sample)
    return scaler.inverse_transform(draws)
开发者ID:Andres-Hernandez,项目名称:CalibrationNN,代码行数:34,代码来源:variational_autoencoder.py

示例12: get_training_data_by_category

    def get_training_data_by_category(category, limit=0):
        limit_pos = limit*0.2
        limit_neg = limit*0.8
        N_pos = DataDAO.count_training_data_by_category(category)
        if N_pos < limit_pos:
            limit_pos = N_pos
            limit_neg = N_pos*5

        training_data = []
        training_target = []
        positive = DataDAO.get_training_data_by_category(category)
        for ind, sample in enumerate(positive):
            if limit != 0 and ind >= limit_pos:
                break
            training_data.append(sample)
            training_target.append(1)
        negative = DataDAO.get_training_data_by_other_categories(category)
        for ind, sample in enumerate(negative):
            if limit != 0 and ind >= limit_neg:
                break
            training_data.append(sample)
            training_target.append(0)

        scaler = MinMaxScaler()
        training_data_scaled = scaler.fit_transform(training_data)

        # training_data_scaled = scale(training_data,axis=0)
        tr_data_sparse = csr_matrix(training_data_scaled)

        return tr_data_sparse, training_target, scaler
开发者ID:cginestra,项目名称:san_francisco_crime,代码行数:30,代码来源:training_factory.py

示例13: NumericColumn

class NumericColumn(BaseEstimator, TransformerMixin):
    '''
    Take a numeric value column and standardize it.
    '''

    def __init__(self):
        '''
        Set up the internal transformation.
        '''
        self._transformer = MinMaxScaler()

    def fit(self, X, y=None):
        '''
        Fit the standardization.
        '''
        zeroed = pd.DataFrame(np.array(X).reshape(-1, 1)).fillna(0)
        self._transformer.fit(zeroed)
        return self

    def transform(self, X):
        '''
        Transform a column of data into numerical percentage values.

        Parameters
        ----------
        X : pandas series or numpy array
        '''
        zeroed = pd.DataFrame(np.array(X).reshape(-1, 1)).fillna(0)
        return self._transformer.transform(zeroed).astype(np.float32)
开发者ID:wballard,项目名称:tableclassifier,代码行数:29,代码来源:table_model.py

示例14: Breast_cancer

def Breast_cancer(training_size, test_size, n, PLOT_DATA):
    class_labels = [r'A', r'B']
    data, target = datasets.load_breast_cancer(True)
    sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)

    # Now we standarize for gaussian around 0 with unit variance
    std_scale = StandardScaler().fit(sample_train)
    sample_train = std_scale.transform(sample_train)
    sample_test = std_scale.transform(sample_test)

    # Now reduce number of features to number of qubits
    pca = PCA(n_components=n).fit(sample_train)
    sample_train = pca.transform(sample_train)
    sample_test = pca.transform(sample_test)

    # Scale to the range (-1,+1)
    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    sample_train = minmax_scale.transform(sample_train)
    sample_test = minmax_scale.transform(sample_test)

    # Pick training size number of samples from each distro
    training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
    test_input = {key: (sample_train[label_train == k, :])[training_size:(
        training_size+test_size)] for k, key in enumerate(class_labels)}

    if PLOT_DATA:
        for k in range(0, 2):
            plt.scatter(sample_train[label_train == k, 0][:training_size],
                        sample_train[label_train == k, 1][:training_size])

        plt.title("PCA dim. reduced Breast cancer dataset")
        plt.show()

    return sample_train, training_input, test_input, class_labels
开发者ID:GiuseppeOrlando878776,项目名称:qiskit-tutorials,代码行数:35,代码来源:svm_datasets.py

示例15: predict_new

 def predict_new(self, input):
     model = self.train_model()
     assert len(input) == 5 and type(input) == list
     scaler = MinMaxScaler(feature_range=(0, 1))
     scaler.fit(self.data)
     inp = scaler.transform([input])
     print(scaler.inverse_transform(model.predict(numpy.array(inp).reshape(1, 1, 5))))
开发者ID:at553,项目名称:golden_touch,代码行数:7,代码来源:predict.py


注:本文中的sklearn.preprocessing.MinMaxScaler类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。