当前位置: 首页>>代码示例>>Python>>正文


Python MinMaxScaler.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.MinMaxScaler.transform方法的典型用法代码示例。如果您正苦于以下问题:Python MinMaxScaler.transform方法的具体用法?Python MinMaxScaler.transform怎么用?Python MinMaxScaler.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.MinMaxScaler的用法示例。


在下文中一共展示了MinMaxScaler.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Iris

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def Iris(training_size, test_size, n, PLOT_DATA):
    class_labels = [r'A', r'B', r'C']
    data, target = datasets.load_iris(True)
    sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=1, random_state=42)

    # Now we standarize for gaussian around 0 with unit variance
    std_scale = StandardScaler().fit(sample_train)
    sample_train = std_scale.transform(sample_train)
    sample_test = std_scale.transform(sample_test)

    # Scale to the range (-1,+1)
    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    sample_train = minmax_scale.transform(sample_train)
    sample_test = minmax_scale.transform(sample_test)

    # Pick training size number of samples from each distro
    training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
    test_input = {key: (sample_train[label_train == k, :])[training_size:(
        training_size+test_size)] for k, key in enumerate(class_labels)}

    if PLOT_DATA:
        for k in range(0, 3):
            plt.scatter(sample_train[label_train == k, 0][:training_size],
                        sample_train[label_train == k, 1][:training_size])

        plt.title("Iris dataset")
        plt.show()

    return sample_train, training_input, test_input, class_labels
开发者ID:GiuseppeOrlando878776,项目名称:qiskit-tutorials,代码行数:32,代码来源:svm_datasets.py

示例2: prescale_data

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def prescale_data(x_train, x_test, method):
    """
    Pre-scales training data and (optionally test data) using the specified method.
    :param x_train: The training data to be pre-scaled.
    :param x_test: The (optional) test data to be pre-scaled. Beware that the prescaler is only fit to the training
    data and not to the test data.
    :param method: The method to be used for prescaling. Allowed values are "minmaxscaler" and "standartscaler"
    :return: A tuple of the pre-scaled training and test data or only the training data if the test data was set to None
    """
    if method is not None:
        scaler = None
        if method == "minmaxscaler":
            from sklearn.preprocessing import MinMaxScaler

            scaler = MinMaxScaler()
        if method == "standartscaler":
            from sklearn.preprocessing import StandardScaler

            scaler = StandardScaler()

        if scaler is None:
            raise ValueError("Invalid pre-scaling method: {}".format(method))
        scaler.fit_transform(x_train)
        x_train = scaler.transform(x_train)
        if x_test is not None:
            x_test = scaler.transform(x_test)

    if x_test is not None:
        return x_train, x_test
    else:
        return x_train
开发者ID:SgnJp,项目名称:mload,代码行数:33,代码来源:k_nearest_neighbor_kdtree.py

示例3: _scaled_data

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
    def _scaled_data(self):
        """Load scaled data.

        Args:
            None

        Returns:
            (scaler, train, test): Tuple of list of train and test data

        """
        # Initialize key variables
        (_train, _test) = self._data()

        # Fit scaler
        scaler = MinMaxScaler(feature_range=(-1, 1))
        scaler = scaler.fit(_train)

        # Transform train
        train = _train.reshape(_train.shape[0], _train.shape[1])
        train_scaled = scaler.transform(train)

        # Transform test
        test = _test.reshape(_test.shape[0], _test.shape[1])
        test_scaled = scaler.transform(test)

        # Return
        return scaler, train_scaled, test_scaled
开发者ID:palisadoes,项目名称:AI,代码行数:29,代码来源:forecast-keras-20180214.py

示例4: NB_coefficients

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def NB_coefficients(year=2010):
    poi_dist = getFourSquarePOIDistribution(useRatio=False)
    F_taxi = getTaxiFlow(normalization="bydestination")
    W2 = generate_geographical_SpatialLag_ca()
    Y = retrieve_crime_count(year=year)
    C = generate_corina_features()
    D = C[1]

    popul = C[1][:,0].reshape(C[1].shape[0],1)
    Y = np.divide(Y, popul) * 10000
    
    f2 = np.dot(W2, Y)
    ftaxi = np.dot(F_taxi, Y)
    
    f = np.concatenate( (D, f2, ftaxi, poi_dist), axis=1 )
    mms = MinMaxScaler(copy=False)
    mms.fit(f)
    mms.transform(f)
    header = C[0] + [ 'spatiallag', 'taxiflow'] + \
        ['POI food', 'POI residence', 'POI travel', 'POI arts entertainment', 
                       'POI outdoors recreation', 'POI education', 'POI nightlife', 
                       'POI professional', 'POI shops', 'POI event']
    df = pd.DataFrame(f, columns=header)
    
    np.savetxt("Y.csv", Y, delimiter=",")
    df.to_csv("f.csv", sep=",", index=False)
    
    # NB permute
    nbres = subprocess.check_output( ['Rscript', 'nbr_eval.R', 'ca', 'coefficient'] )
    print nbres
    
    ls = nbres.strip().split(" ")
    coef = [float(e) for e in ls]
    print coef
    return coef, header
开发者ID:thekingofkings,项目名称:chicago-crime,代码行数:37,代码来源:NBRegression.py

示例5: Breast_cancer

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def Breast_cancer(training_size, test_size, n, PLOT_DATA):
    class_labels = [r'A', r'B']
    data, target = datasets.load_breast_cancer(True)
    sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)

    # Now we standarize for gaussian around 0 with unit variance
    std_scale = StandardScaler().fit(sample_train)
    sample_train = std_scale.transform(sample_train)
    sample_test = std_scale.transform(sample_test)

    # Now reduce number of features to number of qubits
    pca = PCA(n_components=n).fit(sample_train)
    sample_train = pca.transform(sample_train)
    sample_test = pca.transform(sample_test)

    # Scale to the range (-1,+1)
    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    sample_train = minmax_scale.transform(sample_train)
    sample_test = minmax_scale.transform(sample_test)

    # Pick training size number of samples from each distro
    training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
    test_input = {key: (sample_train[label_train == k, :])[training_size:(
        training_size+test_size)] for k, key in enumerate(class_labels)}

    if PLOT_DATA:
        for k in range(0, 2):
            plt.scatter(sample_train[label_train == k, 0][:training_size],
                        sample_train[label_train == k, 1][:training_size])

        plt.title("PCA dim. reduced Breast cancer dataset")
        plt.show()

    return sample_train, training_input, test_input, class_labels
开发者ID:GiuseppeOrlando878776,项目名称:qiskit-tutorials,代码行数:37,代码来源:svm_datasets.py

示例6: train

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def train(mode):
    if mode == "NextWeek":
        DATA = "MLprojectOutput/week34567to8Formated/part-00000"
    else:
        DATA = "MLprojectOutput/week34567to9Formated/part-00000"
    X, Y = readData(DATA, 10000, -1)
    X_Scaler = MinMaxScaler().fit(X)
    joblib.dump(X_Scaler, 'Predict{0}_Scaler.pkl'.format(mode))
    X = X_Scaler.transform(X)
    dtrain = xgb.DMatrix(X, label = Y)
    param = { 'booster':"gbtree",
              'eta':0.3,
              'max_depth':6,
              'subsample':0.85,
              'colsample_bytree':0.7,
              'silent':0,
              'objective':'reg:linear',
              'nthread':10,
              'eval_metric':'rmse'}
    __model = xgb.train(param.items(), dtrain)
    __model.save_model('Predict{0}.model'.format(mode))
    X_TEST, Y_TEST = readData(DATA, 0, 10000)
    X_TEST = X_Scaler.transform(X_TEST)
    dtest = xgb.DMatrix(X_TEST)
    Y_pred = list(map(lambda x: int(x), __model.predict(dtest)))
    evaluate(Y_TEST,Y_pred)
开发者ID:nikitasonthalia,项目名称:MachineLearningProject,代码行数:28,代码来源:CreateModel.py

示例7: train_lr

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def train_lr( X, y, X_test, y_test, t, col_names = None, sample_weight = None ):
    sc = MinMaxScaler().fit(X)
    X = sc.transform(X)
    start = datetime.datetime.now()
    X_test_trans = sc.transform(X_test)
    print("training balanced LR..")
    lr = linear_model.LogisticRegression(class_weight='balanced')
    if sample_weight is not None:
        lr.fit(X, y, sample_weight)
    else:
        lr.fit(X, y)
    print("training mean accuracy = %.2f" % lr.score(X, y))
    print("testing mean accuracy = %.2f" % lr.score(X_test_trans, y_test))
    if col_names is not None:
        c = np.column_stack((col_names, np.round(lr.coef_.flatten(),2)))
        sorted_c = c[c[:,1].argsort()]
        print(sorted_c[:10])
        print(sorted_c[-10:])
    y_prob = lr.predict_proba(X_test_trans)
    end = datetime.datetime.now()
    delta = end - start
    y_pred = y_prob[:, 1] > t
    y_pred = y_pred.astype('uint8')
    print('--- t = %.2f results:' % t)
    print_results(y_test, y_pred)
    print('total time predictions: %f (s)' % delta.total_seconds())
    print('time per query: %f (s)' % (delta.total_seconds() / len(y_pred)))
    false_preds = y_pred != y
    false_vectors = np.multiply(lr.coef_ * X[false_preds, :])
    c = np.column_stack((col_names, np.round(lr.coef_.flatten(),2)))
    print(vector.shape)
    return y_pred
开发者ID:vahid-g,项目名称:CommunicationCapacity,代码行数:34,代码来源:run_swiki.py

示例8: NMFReducer

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
class NMFReducer():

    def __init__(self, dataset, dataset_name, num_components=10):
        self.dataset = dataset
        self.dataset_name = dataset_name
        self.labels = dataset.target
        self.scaler = MinMaxScaler()
        self.data = self.scaler.fit_transform(dataset.data)
        self.n_samples, self.n_features = self.data.shape

        self.reducer = NMF(n_components=num_components, max_iter=5000)

    def reduce(self):
        self.reducer.fit(self.data)
        self.reduced = self.scaler.fit_transform(self.reducer.transform(self.data))
        return self.reduced

    def benchmark(self, estimator, name, data):
        t0 = time()
        sample_size = 300
        labels = self.labels

        estimator.fit(data)
        print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
              % (name, (time() - t0), estimator.inertia_,
                 metrics.homogeneity_score(labels, estimator.labels_),
                 metrics.completeness_score(labels, estimator.labels_),
                 metrics.v_measure_score(labels, estimator.labels_),
                 metrics.adjusted_rand_score(labels, estimator.labels_),
                 metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
                 metrics.silhouette_score(data, estimator.labels_,
                                          metric='euclidean',
                                          sample_size=sample_size)))

    def display_reduced_digits(self):
        sys.stdout = open('out/NMFReduceDigitsOutput.txt', 'w')
        print("NMF Reduction of %s:\n" % self.dataset_name)
        print(40 * '-')
        print(self.reduced)
        print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
        print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
        print(40 * '-')
        print(self.reducer.reconstruction_err_)

    def display_reduced_iris(self):
        sys.stdout = open('out/NMFReduceIrisOutput.txt', 'w')
        print("NMF Reduction of %s:\n" % self.dataset_name)
        print(40 * '-')
        print(self.reduced)
        print("\nLength of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
        print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
        print(40 * '-')
        print(self.reducer.reconstruction_err_)

    def reduce_crossvalidation_set(self, X_train, X_test):
        self.reducer.fit(X_train)
        reduced_X_train = self.scaler.transform(X_train)
        reduced_X_test = self.scaler.transform(X_test)
        return reduced_X_train, reduced_X_test
开发者ID:camsmith293,项目名称:CS4641_HW3,代码行数:61,代码来源:NMFReducer.py

示例9: preprocess_datasets

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_datasets(X_train, X_test, args):
    if 'scale' in args.preprocessing:
        print('Scaling features to range [-1,1] ...')
        scaler = MinMaxScaler(feature_range=(-1, 1))
        scaler.fit(np.vstack(X_train))
        X_train = [scaler.transform(X_curr) for X_curr in X_train]
        X_test = [scaler.transform(X_curr) for X_curr in X_test]
    return X_train, X_test
开发者ID:caomw,项目名称:motion-classification,代码行数:10,代码来源:evaluate_features.py

示例10: preprocess_datasets

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_datasets(train, test, args):
    if 'scale' in args.preprocessing:
        print('Scaling features to range [-1,1] ...')
        scaler = MinMaxScaler(feature_range=(-1, 1))
        scaler.fit(np.vstack(train.X))
        processed_train = Dataset([scaler.transform(X_curr) for X_curr in train.X], train.y, train.target_names, train.groups)
        processed_test = Dataset([scaler.transform(X_curr) for X_curr in test.X], test.y, test.target_names, test.groups)
    else:
        processed_train = train
        processed_test = test
    return processed_train, processed_test
开发者ID:caomw,项目名称:motion-classification,代码行数:13,代码来源:evaluate.py

示例11: scale

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled
开发者ID:russellburdt,项目名称:data-science,代码行数:13,代码来源:tmp.py

示例12: use

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def use(method):
    if method == 'naive bayes':
        estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
                      ('bayes',GaussianNB())]
        clf = Pipeline(estimators)
        parameters = {"skb__k":[8,9,10,11,12],
                      "pca__n_components":[2,6,4,8]}
        clf = grid_search.GridSearchCV(clf, parameters)
        scaler = MinMaxScaler()
        features_train_scaled = scaler.fit_transform(features_train)
        features_test_scaled = scaler.transform(features_test)
        clf.fit(features_train_scaled, labels_train)
        pred = clf.predict(features_test_scaled)
        print clf.best_params_
        features_k = clf.best_params_['skb__k']
        SKB_k = SelectKBest(f_classif, k = features_k)
        SKB_k.fit_transform(features_train_scaled, labels_train)
        print "features score: "
        print SKB_k.scores_
        features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
        print features_selected
    elif method == 'svm':
        estimators = [('reduce_dim', PCA()), ('svc', SVC())]
        clf = Pipeline(estimators)
        parameters = {'svc__C': [1,10]}
        clf = grid_search.GridSearchCV(clf, parameters)
        scaler = MinMaxScaler()
        features_train_scaled = scaler.fit_transform(features_train)
        features_test_scaled = scaler.transform(features_test)
        clf.fit(features_train_scaled, labels_train)
        pred = clf.predict(features_test_scaled)
        print clf.best_estimator_
    elif method == 'decision tree':
        estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
                      ('tree', tree.DecisionTreeClassifier())]
        clf = Pipeline(estimators)
        parameters = {"tree__min_samples_split": [2,10],"skb__k":[8,9,10,11,12],
                      "pca__n_components":[2,4,6,8]}
        clf = grid_search.GridSearchCV(clf, parameters)
        scaler = MinMaxScaler()
        features_train_scaled = scaler.fit_transform(features_train)
        features_test_scaled = scaler.transform(features_test)
        clf.fit(features_train_scaled, labels_train)
        pred = clf.predict(features_test_scaled)
        print clf.best_params_
        features_k = clf.best_params_['skb__k']
        SKB_k = SelectKBest(f_classif, k = features_k)
        SKB_k.fit_transform(features_train, labels_train)
        features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
        print features_selected
    accuracy = accuracy_score(labels_test, pred)
    print "accuracy score:"
    print accuracy
    calculate_precision_recall(pred, labels_test)
开发者ID:hcxyeah,项目名称:Machine-Learning,代码行数:56,代码来源:poi_id.py

示例13: scale

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale(X_train, X_test):
    """
    This function takes two parameters, the Training & Testing samples/features and returns the respective normalized/scaled
    versions.
    :param X_train: Training set samples
    :param X_test: Testing set samples
    :return: tuple of normalized/scaled Training & Testing sets samples
    """
    scaler = MinMaxScaler().fit(X_train) #scaler object fitted on training set of samples
    scaled_X_train = scaler.transform(X_train) #transformed normalized data - Training set samples
    scaled_X_test = scaler.transform(X_test) #transformed normalized data - Testing set samples
    return (scaled_X_train, scaled_X_test)
开发者ID:cpnguyen5,项目名称:hs698-flask_ml,代码行数:14,代码来源:views.py

示例14: scale_data

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def scale_data(train, test):

    scaler = MinMaxScaler(feature_range=(-1, 1))

    # determine max and min values on training set (per feature) (scale training set with it)
    scaler = scaler.fit(train)
    train_scaled = scaler.transform(train)

    # apply the found parameters to test set (DO NOT compute them again)
    test_scaled = scaler.transform(test)

    return train_scaled, test_scaled
开发者ID:PaulZhutovsky,项目名称:rsn_analysis,代码行数:14,代码来源:ml_utils.py

示例15: preprocess_split_data

# 需要导入模块: from sklearn.preprocessing import MinMaxScaler [as 别名]
# 或者: from sklearn.preprocessing.MinMaxScaler import transform [as 别名]
def preprocess_split_data(train_data, valid_data, test_data,
                          feature_mode='framewise', non_overlap=False,
                          non_overlap_chunk_size=10, use_min_max=False):
    # NOTE: This function mutates data so there aren't extra copies

    # Remove overlapping frames if no overlap
    if non_overlap:
        remove_data_overlap(train_data, chunk_size=non_overlap_chunk_size)
        if valid_data:
            remove_data_overlap(valid_data, chunk_size=non_overlap_chunk_size)
        remove_data_overlap(test_data, chunk_size=non_overlap_chunk_size)

    # Apply min max scaling to data
    min_max_scaler = MinMaxScaler()
    if use_min_max:
        train_data['features'] = min_max_scaler.fit_transform(
            train_data['features'])
        if valid_data:
            valid_data['features'] = min_max_scaler.transform(valid_data['features'])
        test_data['features'] = min_max_scaler.transform(test_data['features'])

    if feature_mode == 'framewise':
        # Expand training and validation labels to apply to each frame
        expand_framewise_labels(train_data)
        if valid_data:
            expand_framewise_labels(valid_data)
    elif feature_mode == 'stats':
        # Summarize frames in each file using summary statistics
        framewise_to_stats(train_data)
        if valid_data:
            framewise_to_stats(valid_data)
        framewise_to_stats(test_data)
    else:
        raise ValueError('Invalid feature mode: {}'.format(feature_mode))

    # Standardize features
    stdizer = StandardScaler()
    train_data['features'] = stdizer.fit_transform(train_data['features'])
    if valid_data:
        valid_data['features'] = stdizer.transform(valid_data['features'])
    test_data['features'] = stdizer.transform(test_data['features'])

    # Shuffle training data
    num_train_examples = len(train_data['labels'])
    shuffle_idxs = np.random.permutation(num_train_examples)
    reverse_shuffle_idxs = np.argsort(shuffle_idxs)
    train_data['features'] = train_data['features'][shuffle_idxs]
    train_data['labels'] = train_data['labels'][shuffle_idxs]
    train_data['file_idxs'] = [reverse_shuffle_idxs[slice(*pair)]
                               for pair in train_data['file_idxs']]

    return min_max_scaler, stdizer
开发者ID:Mohitsharma44,项目名称:l3embedding,代码行数:54,代码来源:features.py


注:本文中的sklearn.preprocessing.MinMaxScaler.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。