当前位置: 首页>>代码示例>>Python>>正文


Python RandomizedPCA.transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.RandomizedPCA.transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA.transform方法的具体用法?Python RandomizedPCA.transform怎么用?Python RandomizedPCA.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.RandomizedPCA的用法示例。


在下文中一共展示了RandomizedPCA.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tryLinearDiscriminantAnalysis

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def tryLinearDiscriminantAnalysis(goFast):
  from sklearn.datasets import dump_svmlight_file, load_svmlight_file
  if goFast:
    training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
  else:
    training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)

  from sklearn.lda import LDA
  from sklearn.metrics import accuracy_score
  from sklearn.grid_search import ParameterGrid
  from sklearn.decomposition import RandomizedPCA

  rpcaDataGrid = [{"n_components": [10,45,70,100],
                    "iterated_power": [2, 3, 4],
                    "whiten": [True]}]

  for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
    rpcaOperator = RandomizedPCA(**rpca_parameter_set)
    rpcaOperator.fit(training_data,training_labels)
    new_training_data = rpcaOperator.transform(training_data,training_labels)
    new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
    ldaOperator = LDA()
    ldaOperator.fit(new_training_data,training_labels)
    print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
开发者ID:Ikram,项目名称:DUMLS14,代码行数:30,代码来源:dataset_one_learner.py

示例2: pca_estimator

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def pca_estimator(data, targets, estimator, components_number=DEFAULT_COMPONENTS_NUMBER,
                  folds_number=DEFAULT_FOLDS_NUMBER):

    kf = KFold(len(targets), n_folds=folds_number)

    # 'scores' is numpy array. An index is a number of a fold. A value is a percent of right
    # predicted samples from a test.
    scores = np.zeros(folds_number)

    start = time()

    index = 0
    for train, test in kf:
        x_train, x_test, y_train, y_test = data[train], data[test], targets[train], targets[test]

        pca = RandomizedPCA(n_components=components_number, whiten=True).fit(x_train)
        x_train_pca = pca.transform(x_train)
        x_test_pca = pca.transform(x_test)

        clf = estimator.fit(x_train_pca, y_train)
        scores[index] = clf.score(x_test_pca, y_test)
        index += 1
        # print("Iteration %d from %d has done! Score: %f" % (index, folds_number,
        #                                                     scores[index - 1]))
    finish = time()

    return scores.mean(), scores.std() * 2, (finish - start)
开发者ID:himl,项目名称:boson,代码行数:29,代码来源:PCA.py

示例3: main

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def main():
    #create the training & test sets, skipping the header row with [1:]
    dataset = genfromtxt(open('data/train.csv','r'), delimiter=',', dtype='u1')[1:]    
    target = [x[0] for x in dataset]
    train = [x[1:] for x in dataset]
    test = genfromtxt(open('data/test.csv','r'), delimiter=',', dtype='u1')[1:]

    #build crossvalidation training set
    train_train, train_test, target_train, target_test = cross_validation.train_test_split(train, target, test_size=0.2, random_state=0)
    print train_train.shape
    print train_test.shape

    #PCA
    pca = RandomizedPCA(n_components=40)
    pca.fit(train_train)
    
    #create and train the random forest
    rf = RandomForestClassifier(n_estimators=1000, n_jobs=4)
    rf.fit(hstack((train_train, pca.transform(train_train))), target_train)
    print "crossval score is: ", rf.score(hstack((train_test, pca.transform(train_test))), target_test)

    labelid = np.array(range(1,28001))

    output = rf.predict(hstack((test, pca.transform(test))))
    savetxt('data/submission.csv', np.column_stack((labelid, output)), delimiter=',', header="ImageId,Label", fmt='%u', comments='')
开发者ID:columbiadatascience,项目名称:digit-recognizer,代码行数:27,代码来源:benchmark.py

示例4: SVM

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X_data, y_data):

	X_data = equalize_hist(X_data) 
	preprocessing.normalize(X_data, 'max')
	preprocessing.scale(X_data, axis=1)
	# preprocessing.normalize(X_data, 'max')
	# X_data = equalize_hist(X_data) 

	# divide our data set into a training set and a test set
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_data, y_data, test_size=TRAIN_TEST_SPLIT_RATIO)

	n_components = 120

	print("Extracting the top %d eigenfaces from %d faces"
		% (n_components, X_train.shape[0]))
	pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)

	print("Projecting the input data on the eigenfaces orthonormal basis")
	X_train_pca = pca.transform(X_train)
	X_test_pca = pca.transform(X_test)
	print("done ")

	param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
	'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
	classifier = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
	classifier.fit(X_train_pca, y_train)



	print("====== PCA 150 ========")
	print('TRAIN SCORE', classifier.score(X_train_pca, y_train))
	print('TEST SCORE', classifier.score(X_test_pca, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:34,代码来源:SVM_best.py

示例5: pca_data

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def pca_data(test_x, train_x, params):
    print 'pcaing data ...'
    components = int(params['components'])
    pca = RandomizedPCA(components, whiten=True).fit(train_x)
    pca_train_x = pca.transform(train_x)
    pca_test_x  = pca.transform(test_x)
    return pca_test_x, pca_train_x
开发者ID:123fengye741,项目名称:FaceRetrieval,代码行数:9,代码来源:pre_process.py

示例6: do_nbnn

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def do_nbnn(train_folder, test_folder):
    train = load_patches(args.train_folder)
    test = load_patches(args.test_folder)
    if options.relu:
        get_logger().info("Applying RELU")
        for class_data in train:
            class_data.patches = class_data.patches.clip(min=0)
        for class_data in test:
            class_data.patches = class_data.patches.clip(min=0)
    if options.scale:
        get_logger().info("Applying standardization")
        scaler = StandardScaler(copy=False)
        scaler.fit(np.vstack([t.patches for t in train]))
        for class_data in train:
            class_data.patches = scaler.transform(class_data.patches)
        for class_data in test:
            class_data.patches = scaler.transform(class_data.patches)
    if options.pca:
        get_logger().info("Calculating PCA")
        pca = RandomizedPCA(n_components=options.pca)
        pca.fit(np.vstack([t.patches for t in train]))
        #for class_data in train:
            #get_logger().info("Fitting class " + class_data.name)
            #pca.partial_fit(class_data.patches)
        get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
             ") components\nApplying PCA")
        for class_data in train:
            class_data.patches = pca.transform(class_data.patches)
        for class_data in test:
            class_data.patches = pca.transform(class_data.patches)
    nbnn(train, test, NN_Engine())
开发者ID:enoonIT,项目名称:nbnn-nbnl,代码行数:33,代码来源:nbnn.py

示例7: test_sparse_randomized_pca_inverse

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def test_sparse_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on sparse data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= 0.00001  # make middle component relatively small
    # no large means because the sparse version of randomized pca does not do
    # centering to avoid breaking the sparsity
    X = csr_matrix(X)

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DeprecationWarning)
        pca = RandomizedPCA(n_components=2, random_state=0).fit(X)
        assert_equal(len(w), 1)
        assert_equal(w[0].category, DeprecationWarning)

    Y = pca.transform(X)

    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X.todense(), Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DeprecationWarning)
        pca = RandomizedPCA(n_components=2, whiten=True, random_state=0).fit(X)
        assert_equal(len(w), 1)
        assert_equal(w[0].category, DeprecationWarning)

    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X.todense() - Y_inverse) / np.abs(X).mean()).max()
    # XXX: this does not seam to work as expected:
    assert_almost_equal(relative_max_delta, 0.91, decimal=2)
开发者ID:mugiro,项目名称:elm-python,代码行数:37,代码来源:test_pca.py

示例8: SVM

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X_train, y_train, X_test):
    print("SVM with PCA of rbf, writening all on, no normalize")
    preprocessing.normalize(X_train, 'max')
    preprocessing.normalize(X_test, 'max')
    #preprocessing.robust_scale(X, axis=1, with_centering = True) #bad
    X_train = equalize_hist(X_train)
    X_test = equalize_hist(X_test)
    '''X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)'''

    n_components = 147

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=False).fit(X_train)

    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done ")

    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train)
    return list(classifier13.predict(X_test_pca))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:27,代码来源:SVM_filter.py

示例9: getPrincipleComponents

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def getPrincipleComponents(xtr, xte, n_components=50):
    train = np.array(xtr)
    test = np.array(xte)
    pca = RandomizedPCA(n_components=n_components).fit(train)
    xtrain = pca.transform(train)
    xtest = pca.transform(test)
    return xtrain, xtest
开发者ID:MachineLearningProjectS16,项目名称:MNIST_Project,代码行数:9,代码来源:util.py

示例10: LogisticRegressionPCA

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def LogisticRegressionPCA(X, y):

	# divide our data set into a training set and a test set
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    									X, y, test_size=TRAIN_TEST_SPLIT_RATIO)

	# get randomized PCA model
	num_components = 147
	print("Extracting the top %d eigenfaces from %d faces"
          % (num_components, X_train.shape[0]))
	pca = RandomizedPCA(n_components=num_components, whiten=True).fit(X_train)

    # use the PCA model on our training set and test set.
	print("Projecting the input data on the eigenfaces orthonormal basis")
	X_train_pca = pca.transform(X_train)
	X_test_pca = pca.transform(X_test)
	print("done ")

	h = .02  # step size in the mesh

	logistic_regression = linear_model.LogisticRegression(C=1e5)

	# we create an instance of Neighbours Classifier and fit the data.
	logistic_regression.fit(X, y)

	# print the performance of logistic regression 
	print("====== Logistic Regression with PCA ========")
	print('TRAIN SCORE', logistic_regression.score(X_train, y_train))
	print('TEST SCORE', logistic_regression.score(X_test, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:31,代码来源:logistic_regression_with_PCA.py

示例11: SVM

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X, y):

	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
	print(len(X_train))

    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
	n_components = 150
	pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)


	print("Projecting the input data on the eigenfaces orthonormal basis")
	X_train_pca = pca.transform(X_train)
	X_test_pca = pca.transform(X_test)
	print("done ")

	X_train_pca = equalize_hist(X_train_pca)
	preprocessing.scale(X_train_pca * 1.0, axis=1)
	X_test_pca = equalize_hist(X_test_pca)
	preprocessing.scale(X_test_pca * 1.0, axis=1)

    # classifier = svm.SVC(kernel='poly', degree = 3)
    # classifier.fit(X_train, y_train)
    # # print("======",3,"========")
    # print('TRAIN SCORE', classifier.score(X_train, y_train))
    # print('TEST SCORE', classifier.score(X_test, y_test))


	param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
	classifier2 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
	classifier2.fit(X_train_pca, y_train)
	# print("======",3,"========")
	print('TRAIN SCORE', classifier2.score(X_train_pca, y_train))
	print('TEST SCORE', classifier2.score(X_test_pca, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:37,代码来源:yue.py

示例12: rpca

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def rpca(train_X, test_X, n):
	start_time = time.time()
	pca = RandomizedPCA(n_components=n)
	pca.fit(train_X.toarray())
	train_X_pca = pca.transform(train_X.toarray())
	test_X_pca = pca.transform(test_X.toarray())
	print("--- %s seconds ---" % (time.time() - start_time))
	return pca, train_X_pca, test_X_pca
开发者ID:purblue10,项目名称:si650_project,代码行数:10,代码来源:fs.py

示例13: Cluster

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
class Cluster(object):

    def __init__(self, name):
        self.name = name
        self.raw_dataset = []
        self.dataset = []
        self.dataset_red = []
    
    def get_featurevec(self, data):
            '''Takes in data in the form of an array of EmoPackets, and outputs
                a list of feature vectors.'''
            # CHECKED, all good :) 
            num_bins = (len(data)/int(dsp.SAMPLE_RATE*dsp.STAGGER) -
                        int(dsp.BIN_SIZE / dsp.STAGGER) + 1)
            size = int(dsp.BIN_SIZE*dsp.SAMPLE_RATE)
            starts = int(dsp.SAMPLE_RATE*dsp.STAGGER)
            points = []
            for i in range(num_bins):
                points.append(dsp.get_features(data[i*starts:i*starts+size]))
            return points

    def add_data(self, raw):
        '''Allows the addition of new data. Will retrain upon addition.
            Expects a list of EmoPackets.'''
        self.dataset.extend(self.get_featurevec(raw))

    def extract_features(self):
        '''Does feature extraction for all of the datasets.'''
        self.dataset = []
        for sess in self.raw_dataset:
            self.dataset.extend(self.get_featurevec(sess))

    def reduce_dim(self, NDIM=5):
        '''Reduces the dimension of the extracted feature vectors.'''
        X = np.array(self.dataset)
        self.pca = RandomizedPCA(n_components=NDIM).fit(X)
        self.dataset_red = self.pca.transform(X)
        
    def train(self):
        '''Trains the classifier.'''
        self.svm = OneClassSVM()
        self.svm.fit(self.dataset_red)

    def is_novel(self, pt):
        '''Says whether or not the bin is novel. Expects an array of EmoPackets'''
        X = self.pca.transform(np.array(self.get_featurevec(data)[0]))
        ans = self.svm.predict(X)
        self.dataset_red.append(X)
        self.train()
        return ans
                    
    def save(self):
        '''Saves this classifier to a data directory.'''
        this_dir, this_filename = os.path.split(__file__)
        DATA_PATH = os.path.join(this_dir, "data", self.name+'.pkl')
        dumpfile = open(DATA_PATH, "wb")
        pickle.dump(self, dumpfile, pickle.HIGHEST_PROTOCOL)
        dumpfile.close()
开发者ID:cmcneil,项目名称:openepoc,代码行数:60,代码来源:learn.py

示例14: reduce_dim

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
 def reduce_dim(self, NDIM=5):
     '''Reduces the dimension of the extracted feature vectors.'''
     X = np.array(self.neutral)
     pca = RandomizedPCA(n_components=NDIM).fit(X)
     print pca.explained_variance_ratio_
     self.pca = pca
     self.neutral_red = pca.transform(X)
     for label in self.labelled:
         X = np.array(self.labelled[label])
         self.labelled_red[label] = pca.transform(X)
开发者ID:cmcneil,项目名称:openepoc,代码行数:12,代码来源:learn.py

示例15: compute_pca

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
    def compute_pca(self):
#        print 'We have ', self.x.shape[1], 'features. Reducing dimensionality.'
        pca_count = 200
        pca = RandomizedPCA(pca_count, copy = False, whiten=True)
        pca.fit(self.x_train)
        self.x_train = pca.transform(self.x_train)
        if self.do_submission:
            self.x_test = pca.transform(self.x_test)

        if self.do_validation():
            self.x_validate = pca.transform(self.x_validate)
开发者ID:blazej-wieliczko,项目名称:kaggle-emc,代码行数:13,代码来源:main.py


注:本文中的sklearn.decomposition.RandomizedPCA.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。