Python preprocessing.scale函数代码示例

本文整理汇总了Python中sklearn.preprocessing.scale函数的典型用法代码示例。如果您正苦于以下问题：Python scale函数的具体用法？Python scale怎么用？Python scale使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了scale函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: classify

def classify():
    # read training data
    lbls1, X, y = readCsv(TRAIN_CSV, True)
    # read test data
    lbls2, Y, z = readTestCsv(TEST_CSV)
    
    # Conversion to numpy arrays
    X = np.array(X)
    X = X.astype(float)
    y = np.array(y)
    
    Y = np.array(Y)
    Y = Y.astype(float)
    
    # perform feature scaling for zero mean and unit variance
    scale(X, with_mean = True, with_std = True)
    scale(Y, with_mean = True, with_std = True)
    
    lin_svc = svm.LinearSVC(C = 4.0, dual = False)
    lin_svc.fit(X, y)
    
    bestmodel = lin_svc
    preds = bestmodel.predict(Y)
    
    writePredictions(lbls2, preds)

开发者ID:godofwharf，项目名称:ImageClassification，代码行数:25，代码来源:model.py

示例2: create_data_provider

def create_data_provider(dataset, force_write_cache = False, center_data = True,
                         scale_data = True, add_bias_feature = True, normalize_datapoints = False,
                         center_labels = False, scale_labels = False,
                         transform_labels_to_plus_minus_one = True, test_size=0.0):
    data, labels = dataset.get_data(force_write_cache=force_write_cache)
    copy = False
    if scale_data:
        data = preprocessing.scale(data, copy=copy)
    elif center_data:
        data = preprocessing.scale(data, with_std=False, copy=copy)
    if scale_labels:
        labels = preprocessing.scale(labels, copy=copy)
    elif center_labels:
        labels = preprocessing.scale(labels, with_std=False, copy=copy)
    if add_bias_feature:
        data = np.hstack((data, np.ones((data.shape[0], 1))))
    if normalize_datapoints:
        data /= np.linalg.norm(data, axis=1)[:, np.newaxis]
    if transform_labels_to_plus_minus_one:
        labels = labels * 2.0 - 1.0
    test_provider = None
    if test_size > 0.0:
        data, data_test, labels, labels_test = cross_validation.train_test_split(data, labels, test_size=test_size)
        test_provider = DataProvider(data_test, labels_test)
    return DataProvider(data, labels, test_provider=test_provider)

开发者ID:yk，项目名称:mldatasets，代码行数:25，代码来源:mldatasets.py

示例3: extractFeatures

def extractFeatures(data, n):
    logging.info('Features: extracting {0}...'.format(n))

    # create DF
    columns = []
    col_names = ['open', 'high', 'low', 'close', 'volume']
    for col_name in col_names:
        for m in xrange(1, n+1):
            columns.append('{0}_{1}'.format(col_name, m))
    # pprint(columns)
    df = pd.DataFrame(dtype=float, columns=columns)

    pb = ProgressBar(maxval=len(data)).start()
    for i in xrange(n, len(data)+1):
        pb.update(i)
        slice = data.ix[i-n:i]
        # print slice
        scale(slice, axis=0, copy=False)
        # print slice
        cntr = 0
        item = {}
        for slice_index, slice_row in slice.iterrows():
            cntr += 1
            # print slice_index
            # print slice_row
            for col in slice.columns:
                item['{0}_{1}'.format(col, cntr)] = slice_row[col]
        # pprint(item)
        df.loc[i] = item
        # break
    pb.finish()

    logging.info('Features: extracted')
    return df

开发者ID:vishnuvr，项目名称:trading，代码行数:34，代码来源:generator.py

示例4: split_into_chunks

def split_into_chunks(data, train, predict, step, binary=True, scale=True):
    X, Y = [], []
    for i in range(0, len(data), step):
        try:
            x_i = data[i:i+train]
            y_i = data[i+train+predict]
            
            # Use it only for daily return time series
            if binary:
                if y_i > 0.:
                    y_i = [1., 0.]
                else:
                    y_i = [0., 1.]

                if scale: x_i = preprocessing.scale(x_i)
                
            else:
                timeseries = np.array(data[i:i+train+predict])
                if scale: timeseries = preprocessing.scale(timeseries)
                x_i = timeseries[:-1]
                y_i = timeseries[-1]
            
        except:
            break

        X.append(x_i)
        Y.append(y_i)

    return X, Y

开发者ID:Rachnog，项目名称:Deep-Trading，代码行数:29，代码来源:processing.py

示例5: standardize

 def standardize(self):
     """
     impute
     """
     print('Standardization')
     self.tr = scale(self.tr)
     self.te = scale(self.te)

开发者ID:Hossein-Noroozpour，项目名称:PyHDM，代码行数:7，代码来源:HDataManager.py

示例6: buildModel

def buildModel(size):
	with open('Sentiment Analysis Dataset.csv', 'rb') as csvfile:
		pos_tweets =[]
		neg_tweets =[]
		spamreader = csv.reader(csvfile, delimiter=',')
		for row in spamreader:
			if row[1] == '1':
				if not (len(pos_tweets) > size):
					pos_tweets.append(_cleanTweet(row[3]))
			else:
				if not (len(neg_tweets) > size):
					neg_tweets.append(_cleanTweet(row[3]))
	y = np.concatenate((np.ones(len(pos_tweets[0:size])), np.zeros(len(neg_tweets[0:size]))))
	x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos_tweets[0:size], neg_tweets[0:size])), y, test_size=0.2)
	x_train = _cleanText(x_train)
	x_test = _cleanText(x_test)
	n_dim = 100
	#Initialize model and build vocab
	imdb_w2v = Word2Vec(size=n_dim, min_count=10)
	imdb_w2v.build_vocab(x_train)
	imdb_w2v.train(x_train)
	train_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_train])
	train_vecs = scale(train_vecs)
	#Train word2vec on test tweets
	imdb_w2v.train(x_test)
	#Build test tweet vectors then scale
	test_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_test])
	test_vecs = scale(test_vecs)
	lr = SGDClassifier(loss='log', penalty='l1')
	lr.fit(train_vecs, y_train)
	imdb_w2v.save("imdb_w2v")
	f = open("Accuracy.txt","w")
	f.write(str(lr.score(test_vecs, y_test))+" "+str(size*2))
	f.close()

开发者ID:phugiadang，项目名称:CSCI-4308-Open-Sources-Data-Analytics，代码行数:34，代码来源:TweetAnalWord2Vec.py

示例7: trainModel

def trainModel():
    # Model parameters
    W = tf.Variable([.1000], tf.float32)
    b = tf.Variable([-.1000], tf.float32)
    # Model input and output
    x = tf.placeholder(tf.float32, shape=None)
    linear_model = W * x + b
    y = tf.placeholder(tf.float32)
    # loss
    loss = tf.reduce_sum(tf.square(linear_model - y))  # sum of the squares
    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train = optimizer.minimize(loss)
    # training data
    x_train = preprocessing.scale(mouseClickX)
    y_train = preprocessing.scale(mouseClickY)
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)  # reset values to wrong
    for i in range(500):
        sess.run([train], {x: x_train, y: y_train})
        if i % 50 == 0:
            # to visualize the result and improvement
            try:
                ax.lines.remove(lines[0])
            except Exception:
                pass
            print(x_train, y_train, i)
            prediction_value = sess.run(linear_model, feed_dict={x: mouseClickX})
            # plot the prediction
            lines = ax.plot(mouseClickX, prediction_value, 'r-', lw=5)
            plt.pause(1)

开发者ID:allamtb，项目名称:neural-networks-and-deep-learning，代码行数:32，代码来源:2+实时回归用户在界面输入的散点.py

示例8: scale

	def scale(self):
		# FIXME: this cannot work this way, scaling must be done with
		# the joined set.
		if (self.X != None):
			self.X = preprocessing.scale(self.X)
		if (self.X_test != None):
			self.X_test = preprocessing.scale(self.X_test)

开发者ID:aydindemircioglu，项目名称:MixMex，代码行数:7，代码来源:DataSet.py

示例9: main

def main():

	X, Y, X_test = import_data()

	X_n = preprocessing.scale(X)
	X_t_n = preprocessing.scale(X_test)

	X_train, X_test, y_train, y_test = cross_validation.train_test_split( \
	X_n, Y, test_size=0.2, random_state=0)

	alpha = np.arange(0.001, 2.0, 0.001, np.float)

	best_alpha = 0
	best_score = 0

	for a in alpha:
		clf = linear_model.Ridge (alpha = a)
		clf.fit(X_train, y_train)
		sc = clf.score(X_test, y_test)
		if sc > best_score:
			best_alpha = a
			best_score = sc

	
	clf = linear_model.Ridge (alpha = best_alpha)
	clf.fit(X_train, y_train)
	res = clf.predict(X_t_n)

	for var in res:
		print(var[0])

开发者ID:ravediamond，项目名称:HackerRank_DataAnalysis，代码行数:30，代码来源:code.py

示例10: main

def main():
    """TODO: Docstring for main.
    :returns: TODO

    """
    alpha = 1.
    decay = 0.0006
    iter_num = 600
    finetune_iter = 220
    hyper_params = {
            'hidden_layers_sizes':[196,], 'iter_nums':[400,],
            'alphas':[1.,], 'decays':[0.003,],
            'betas':[3,], 'rhos':[0.1,]
            }

    enc = OneHotEncoder(sparse=False)
    mnist = fetch_mldata('MNIST original', data_home='./')
    x_train, x_test, y_train, y_test = \
            train_test_split(scale(mnist.data.astype(float)).astype('float32'),
                             mnist.target.astype('float32'),
                             test_size=0.5, random_state=0)
    x_unlabeled = scale(mnist.data[mnist.target>=5,:].astype(float)).astype('float32')
    y_train = enc.fit_transform(y_train.reshape(y_train.shape[0],1)).astype('float32')

    t_x = T.matrix()
    params, extracted = pretrain_sae(x_unlabeled, hyper_params)
    extracted = function(inputs=[t_x], outputs=[sae_extract(t_x, params)])(x_train)[0]
    params.append(train_softmax(extracted, y_train, iter_num, alpha, decay))
    weights = finetune_sae(x_train, y_train, params, finetune_iter, alpha, decay)

    all_label = np.array(range(0, 10))
    pred = all_label[softmax2class_max(sae_predict(x_test, weights))]
    print accuracy_score(y_test, pred)
    print classification_report(y_test, pred)
    print confusion_matrix(y_test, pred)

开发者ID:ShiehShieh，项目名称:UFLDL-Solution，代码行数:35，代码来源:sae.py

示例11: get_correlation_data

    def get_correlation_data(self, round_number, liste_id, dataset):
        points = []

        #On récupère d'abord les pourcentages de vote pour la liste donnée
        poll_data = self.retrieve_total_votes_for_liste(round_number, liste_id)

        # on range les des données dans un dico propre
        data_x, data_y = [],[]
        for dept_data in poll_data:
            data_x.append(dept_data["vote_percentage"])
            data_y.append(dataset[dept_data["_id"]] / 100)
            points.append({"dept_id" : dept_data["_id"],
                           "votes_percentage" : dept_data["vote_percentage"],
                           "other_percentage" : dataset[dept_data["_id"]] / 100})

        array_x, array_y = array(data_x), array(data_y)

        # on normalise les données de vote et du dataset
        rescaled_x, rescaled_y  = preprocessing.scale(array_x), preprocessing.scale(array_y)

        #on calcule les couleurs pour chacun des départements
        colors, max_val = self._compute_colors(rescaled_x, rescaled_y)

        #surles données non normalisées, on calcule les coefficients de la droite de régression
        reg_slope, reg_y_intercept = self._linear_regression(array_x, array_y)

        for i, x in enumerate(rescaled_x):
            points[i]["votes_normalized"] = rescaled_x[i]
            points[i]["other_normalized"] = rescaled_y[i]
            points[i]["color"] = colors[i]

        return {"points" : points,
                "graph_metadata": {"max" : max_val,
                                   "regression": {"slope" : reg_slope,
                                                  "intercept" : reg_y_intercept}}}

开发者ID:ThomasPoncet，项目名称:Ocre，代码行数:35，代码来源:correlations.py

示例12: permutation_cross_validation

def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle=True, cvmeth='shufflesplit', score_type='r2', n_perm=1000):
    """
    An easy way to evaluate the significance of a cross-validated score by permutations
    -------------------------------------------------
    Parameters:
        estimator: linear model estimator
        X: IV
        y: DV
        n_fold: fold number cross validation
        cvmeth: kfold or shufflesplit. 
                shufflesplit is the random permutation cross-validation iterator
        score_type: scoring type, 'r2' as default
        n_perm: permutation numbers
    Return:
        score: model scores
        permutation_scores: model scores when permutation labels
        pvalues: p value of permutation scores
    """
    try:
        from sklearn import cross_validation, preprocessing
    except ImportError:
        raise Exception('To call this function, please install sklearn')
    if X.ndim == 1:
        X = np.expand_dims(X, axis = 1)
    if y.ndim == 1:
        y = np.expand_dims(y, axis = 1)
    X = preprocessing.scale(X)
    y = preprocessing.scale(y)
    if cvmeth == 'kfold':
        cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle)
    elif cvmeth == 'shufflesplit':
        testsize = 1.0/n_fold
        cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0)
    score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm)
    return score, permutation_scores, pvalues

开发者ID:helloTC，项目名称:ATT，代码行数:35，代码来源:tools.py

示例13: load_all_data

def load_all_data(f_name, scale=True, rnd=False):
    """Get data with labels, split into training, validation and test set."""
    data_file = h5py.File(f_name, 'r')
    x_test = data_file['x_test'][:]
    x_dev = data_file['x_dev'][:]
    x_train = data_file['x_train'][:]
    data_file.close()
    if scale:
        print "scaling..."
        x_test = preprocessing.scale(x_test, with_mean=False)
        x_dev = preprocessing.scale(x_dev, with_mean=False)
        x_train = preprocessing.scale(x_train, with_mean=False)
    print "Total dataset size:"
    print "n train samples: %d" % x_train.shape[0]
    print "n test samples: %d" % x_test.shape[0]
    print "n dev samples: %d" % x_dev.shape[0]
    print "n features: %d" % x_test.shape[1]
    if rnd:
        print "Radomizing training set..."
        np.random.shuffle(x_train)

    return dict(
        x_train=x_train,
        x_test=x_test,
        x_dev=x_dev,
    )

开发者ID:mikimaus78，项目名称:groupNMF，代码行数:26，代码来源:base.py

示例14: get_feature_importances

def get_feature_importances(data_table, obs_metadata, lines_table, use_con_flux=False):
    feature_importances_list = []
    X_colnames = None
    for line_name, line_wavelength in lines_table['source', 'wavelength_target']:
        subset = data_table[(data_table['source'] == line_name) & (data_table['wavelength_target'] == line_wavelength)]
        X, y, labels = get_X_and_y(subset, obs_metadata, use_con_flux)
        if X_colnames is None:
            X_colnames = X.colnames

        params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
                'learning_rate': 0.01, 'loss': 'lad'}
        clf = ensemble.GradientBoostingRegressor(**params)
        X = ndarrayidze(X)

        # Scaling is optional, but I think I'm going to do it (for now) for all methods,
        # just in comparing between valued here and with e.g. ICA there are fewer diffs
        X = skpp.scale(X)
        y = skpp.scale(y)

        clf.fit(X, y)
        feature_importances_list.append(clf.feature_importances_)

    fi = np.array(feature_importances_list)
    fi_table = Table(fi, names = X_colnames)
    fi_table.add_column(lines_table['source'])
    fi_table.add_column(lines_table['wavelength_target'])

    return fi_table

开发者ID:dcunning11235，项目名称:skyflux，代码行数:28，代码来源:gradient_boost_peaks.py

示例15: run

    def run(self):
        roi_data = []
        seg_data = []

        provider_roi = self.roi_layer.dataProvider()
        provider_seg = self.seg_layer.dataProvider()

        feat_seg = QgsFeature()

        self.status.emit('building spatial index')
        time.sleep(0.3)
        index = QgsSpatialIndex()
        piter = 0
        feat_count = provider_seg.featureCount()
        for f in provider_seg.getFeatures():
            seg_data.append(f.attributes()[1:])
            index.insertFeature(f)
            piter += 1
            self.progress.emit(piter * 15 / feat_count)


        self.status.emit('extracting attributes')
        self.log.emit('extracting attributes from roi segments intersection')
        time.sleep(0.3)
        # intersect roi with segments and extract attributes
        piter = 0
        feat_count = provider_roi.featureCount()
        for feat_roi in provider_roi.getFeatures():
            geom = feat_roi.geometry()
            attr_roi = feat_roi.attributes()
            intersects = index.intersects(geom.boundingBox())
            for fid in intersects:
                ffilter = QgsFeatureRequest().setFilterFid(int(fid))
                provider_seg.getFeatures(ffilter).nextFeature(feat_seg)
                # filter geometries that does not intersect
                if geom.intersects(feat_seg.geometry()):
                    attr_seg = feat_seg.attributes()
                    roi_data.append(attr_seg[1:] + attr_roi)
            # emit progress
            piter += 1
            self.progress.emit(15 + (piter * 55 / feat_count))

        # read train data
        roi_data = np.array(roi_data)
        samples = roi_data[:,:-1]
        labels = roi_data[:,-1].astype(int)
        # svm fit and predict
        self.status.emit('svm: fitting data')
        time.sleep(0.3)
        classifier = svm.SVC(**self.svm_dict)
        classifier.fit(preprocessing.scale(samples), labels)
        self.progress.emit(85)

        self.status.emit('svm: predicting labels')
        time.sleep(0.3)
        seg_data = preprocessing.scale(seg_data)
        predictions = classifier.predict(seg_data).tolist()
        self.progress.emit(100)

        self.output = pickle.dumps(predictions)

开发者ID:vitorhirota，项目名称:QgisImageAnalysis，代码行数:60，代码来源:classifier.py

注：本文中的sklearn.preprocessing.scale函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。