当前位置: 首页>>代码示例>>Python>>正文


Python decomposition.RandomizedPCA类代码示例

本文整理汇总了Python中sklearn.decomposition.RandomizedPCA的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA类的具体用法?Python RandomizedPCA怎么用?Python RandomizedPCA使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了RandomizedPCA类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tryLinearDiscriminantAnalysis

def tryLinearDiscriminantAnalysis(goFast):
  from sklearn.datasets import dump_svmlight_file, load_svmlight_file
  if goFast:
    training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
  else:
    training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)

  from sklearn.lda import LDA
  from sklearn.metrics import accuracy_score
  from sklearn.grid_search import ParameterGrid
  from sklearn.decomposition import RandomizedPCA

  rpcaDataGrid = [{"n_components": [10,45,70,100],
                    "iterated_power": [2, 3, 4],
                    "whiten": [True]}]

  for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
    rpcaOperator = RandomizedPCA(**rpca_parameter_set)
    rpcaOperator.fit(training_data,training_labels)
    new_training_data = rpcaOperator.transform(training_data,training_labels)
    new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
    ldaOperator = LDA()
    ldaOperator.fit(new_training_data,training_labels)
    print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
开发者ID:Ikram,项目名称:DUMLS14,代码行数:28,代码来源:dataset_one_learner.py

示例2: scatter

def scatter(data, labels=None, title=None, name=None):
    """2d PCA scatter plot with optional class info

    Return the pca model to be able to introspect the components or transform
    new data with the same model.
    """
    data = atleast2d_or_csr(data)

    if data.shape[1] == 2:
        # No need for a PCA:
        data_2d = data
    else:
        pca = RandomizedPCA(n_components=2)
        data_2d = pca.fit_transform(data)

    for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
        plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
                    c=c, marker=m, label=i, alpha=0.5)

    plt.legend(loc='best')
    if title is None:
        title = "2D PCA scatter plot"
        if name is not None:
            title += " for " + name
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.title(title)

    return pca
开发者ID:chrinide,项目名称:oglearn,代码行数:29,代码来源:visualization.py

示例3: LogisticRegressionPCA

def LogisticRegressionPCA(X, y):

	# divide our data set into a training set and a test set
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    									X, y, test_size=TRAIN_TEST_SPLIT_RATIO)

	# get randomized PCA model
	num_components = 147
	print("Extracting the top %d eigenfaces from %d faces"
          % (num_components, X_train.shape[0]))
	pca = RandomizedPCA(n_components=num_components, whiten=True).fit(X_train)

    # use the PCA model on our training set and test set.
	print("Projecting the input data on the eigenfaces orthonormal basis")
	X_train_pca = pca.transform(X_train)
	X_test_pca = pca.transform(X_test)
	print("done ")

	h = .02  # step size in the mesh

	logistic_regression = linear_model.LogisticRegression(C=1e5)

	# we create an instance of Neighbours Classifier and fit the data.
	logistic_regression.fit(X, y)

	# print the performance of logistic regression 
	print("====== Logistic Regression with PCA ========")
	print('TRAIN SCORE', logistic_regression.score(X_train, y_train))
	print('TEST SCORE', logistic_regression.score(X_test, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:29,代码来源:logistic_regression_with_PCA.py

示例4: SVM

def SVM(X, y):

	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
	print(len(X_train))

    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
	n_components = 150
	pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)


	print("Projecting the input data on the eigenfaces orthonormal basis")
	X_train_pca = pca.transform(X_train)
	X_test_pca = pca.transform(X_test)
	print("done ")

	X_train_pca = equalize_hist(X_train_pca)
	preprocessing.scale(X_train_pca * 1.0, axis=1)
	X_test_pca = equalize_hist(X_test_pca)
	preprocessing.scale(X_test_pca * 1.0, axis=1)

    # classifier = svm.SVC(kernel='poly', degree = 3)
    # classifier.fit(X_train, y_train)
    # # print("======",3,"========")
    # print('TRAIN SCORE', classifier.score(X_train, y_train))
    # print('TEST SCORE', classifier.score(X_test, y_test))


	param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
	classifier2 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
	classifier2.fit(X_train_pca, y_train)
	# print("======",3,"========")
	print('TRAIN SCORE', classifier2.score(X_train_pca, y_train))
	print('TEST SCORE', classifier2.score(X_test_pca, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:35,代码来源:yue.py

示例5: pca_estimator

def pca_estimator(data, targets, estimator, components_number=DEFAULT_COMPONENTS_NUMBER,
                  folds_number=DEFAULT_FOLDS_NUMBER):

    kf = KFold(len(targets), n_folds=folds_number)

    # 'scores' is numpy array. An index is a number of a fold. A value is a percent of right
    # predicted samples from a test.
    scores = np.zeros(folds_number)

    start = time()

    index = 0
    for train, test in kf:
        x_train, x_test, y_train, y_test = data[train], data[test], targets[train], targets[test]

        pca = RandomizedPCA(n_components=components_number, whiten=True).fit(x_train)
        x_train_pca = pca.transform(x_train)
        x_test_pca = pca.transform(x_test)

        clf = estimator.fit(x_train_pca, y_train)
        scores[index] = clf.score(x_test_pca, y_test)
        index += 1
        # print("Iteration %d from %d has done! Score: %f" % (index, folds_number,
        #                                                     scores[index - 1]))
    finish = time()

    return scores.mean(), scores.std() * 2, (finish - start)
开发者ID:himl,项目名称:boson,代码行数:27,代码来源:PCA.py

示例6: test_explained_variance

def test_explained_variance():
    # Check that PCA output has unit-variance
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 80

    X = rng.randn(n_samples, n_features)

    pca = PCA(n_components=2).fit(X)
    rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
    assert_array_almost_equal(pca.explained_variance_ratio_,
                              rpca.explained_variance_ratio_, 1)

    # compare to empirical variances
    X_pca = pca.transform(X)
    assert_array_almost_equal(pca.explained_variance_,
                              np.var(X_pca, axis=0))

    X_rpca = rpca.transform(X)
    assert_array_almost_equal(rpca.explained_variance_, np.var(X_rpca, axis=0),
                              decimal=1)

    # Same with correlated data
    X = datasets.make_classification(n_samples, n_features,
                                     n_informative=n_features-2,
                                     random_state=rng)[0]

    pca = PCA(n_components=2).fit(X)
    rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
    assert_array_almost_equal(pca.explained_variance_ratio_,
                              rpca.explained_variance_ratio_, 5)
开发者ID:AppliedArtificialIntelligence,项目名称:scikit-learn,代码行数:31,代码来源:test_pca.py

示例7: dimentionality_reduction

def dimentionality_reduction(train_x , test_x):
	print "Dimentionality reduction to 10D on training and test data...."
	pca = RandomizedPCA(n_components=10)
	train_x = pca.fit_transform(train_x)
	test_x = pca.transform(test_x)
	print "Done."
	return train_x , test_x
开发者ID:shreyanshd,项目名称:Image-Classifier,代码行数:7,代码来源:image_classifier.py

示例8: reduce_features

def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
	"""
	Performs feature reduction using PCA. Automatically selects nr. components
	for explaining min_var_explained variance.
	:param features: Features.
	:param var_explained: Minimal variance explained.
	:param n_components: Nr. of components.
	:param exclude_columns: Columns to exclude.
	:param verbose: Verbosity.
	:return: Reduced feature set.
	"""
	if n_components == 0:
		# Run full PCA to estimate nr. components for explaining given
		# percentage of variance.
		estimator = RandomizedPCA()
		estimator.fit_transform(features)
		variance = 0.0
		for i in range(len(estimator.explained_variance_ratio_)):
			variance += estimator.explained_variance_ratio_[i]
			if variance > var_explained:
				n_components = i + 1
				if verbose:
					print('{} % of variance explained using {} components'.format(var_explained, n_components))
				break
	# Re-run PCA with only estimated nr. components
	estimator = RandomizedPCA(n_components=n_components)
	features = estimator.fit_transform(features)
	return features
开发者ID:rbrecheisen,项目名称:scripts,代码行数:28,代码来源:prepare.py

示例9: do_nbnn

def do_nbnn(train_folder, test_folder):
    train = load_patches(args.train_folder)
    test = load_patches(args.test_folder)
    if options.relu:
        get_logger().info("Applying RELU")
        for class_data in train:
            class_data.patches = class_data.patches.clip(min=0)
        for class_data in test:
            class_data.patches = class_data.patches.clip(min=0)
    if options.scale:
        get_logger().info("Applying standardization")
        scaler = StandardScaler(copy=False)
        scaler.fit(np.vstack([t.patches for t in train]))
        for class_data in train:
            class_data.patches = scaler.transform(class_data.patches)
        for class_data in test:
            class_data.patches = scaler.transform(class_data.patches)
    if options.pca:
        get_logger().info("Calculating PCA")
        pca = RandomizedPCA(n_components=options.pca)
        pca.fit(np.vstack([t.patches for t in train]))
        #for class_data in train:
            #get_logger().info("Fitting class " + class_data.name)
            #pca.partial_fit(class_data.patches)
        get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
             ") components\nApplying PCA")
        for class_data in train:
            class_data.patches = pca.transform(class_data.patches)
        for class_data in test:
            class_data.patches = pca.transform(class_data.patches)
    nbnn(train, test, NN_Engine())
开发者ID:enoonIT,项目名称:nbnn-nbnl,代码行数:31,代码来源:nbnn.py

示例10: build_classifier

def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
    print "Attempting to build classifier."
    train_data_x = train_data_x_in
    transformer = ""
    # classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
    if classifier_in == "svc_basic":
        classifier = svm.SVC()
        print "Selection was basic svm.SVC."
    elif classifier_in == "svc_extensive":
        classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
        print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
    elif classifier_in == "kneighbors_basic":
        transformer = RandomizedPCA(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
    elif classifier_in == "bagging_basic":
        classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
        print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
    elif classifier_in == "spectral_basic":
        transformer = SpectralEmbedding(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
    # default to SVC in case of any sort of parsing error.
    else:
        print "Error in selecting classifier class. Reverting to SVC."
        classifier = svm.SVC()
    classifier.fit(train_data_x, train_data_y)
    print "Doing classifier estimation."
    return classifier, train_data_x, transformer
开发者ID:RAMichel,项目名称:image_classifier,代码行数:31,代码来源:build_classifier.py

示例11: calc_hog

def calc_hog(fpaths, save=False):
    '''
    Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
    Input:
        fpaths : files on which HOG will be computed
        save : if true, output is saved to disk
    '''

    hogs = np.empty((len(fpaths), 15876))

    for i, fpath in enumerate(fpaths):
        img = imread(os.path.join(imgdir, fpath))
        if len(img.shape)==3:
            img = rgb2gray(img)
        # rescale so all feature vectors are the same length
        img_resize = resize(img, (128, 128))
        img_hog = hog(img_resize)

        hogs[i, :] = img_hog

    hogs_sc = scale(hogs)
    n_components = 15
    pca = RandomizedPCA(n_components=n_components)
    hogs_decomp = pca.fit_transform(hogs_sc)

    df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
    df.index.name='fpath'
    df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
    if save: df.to_csv('hog.csv')
    
    return df
开发者ID:r-b-g-b,项目名称:AY250_HW,代码行数:31,代码来源:calcFeatures.py

示例12: fit

    def fit(self):

        wordids_map = NameToIndex()
        labs_map = NameToIndex()

        wordscount = self._word_cluster.get_words_count()
        print "start compute_tfidf ..."
        #计算文档的词袋模型
        docs = self._word_cluster.get_samples()
        count =0
        bow = []
        labs = []

        for k,v in docs.iteritems():
            vec = numpy.zeros(wordscount).tolist()
            for i in v:
                vec[wordids_map.map(i)]+=1
            bow.append(vec)
            labs.append(labs_map.map(k[0]))

        labs = numpy.array(labs)

        tfidf = TfidfTransformer(smooth_idf=True, sublinear_tf=True,use_idf=True)
        datas = numpy.array(tfidf.fit_transform(bow).toarray())

        print "compute_tfidf done"
        pca = RandomizedPCA(n_components=20, whiten=True).fit(datas)
        svc = train_svc(numpy.array(labs_map.names), labs, pca.transform(datas))

        self._tfidf = tfidf
        self._svc = svc
        self._labs_map = labs_map
        self._wordids_map = wordids_map
        self._pca = pca
开发者ID:caoym,项目名称:odr,代码行数:34,代码来源:odr.py

示例13: test_randomized_pca_check_list

def test_randomized_pca_check_list():
    """Test that the projection by RandomizedPCA on list data is correct"""
    X = [[1.0, 0.0], [0.0, 1.0]]
    X_transformed = RandomizedPCA(n_components=1, random_state=0).fit(X).transform(X)
    assert_equal(X_transformed.shape, (2, 1))
    assert_almost_equal(X_transformed.mean(), 0.00, 2)
    assert_almost_equal(X_transformed.std(), 0.71, 2)
开发者ID:Garrett-R,项目名称:scikit-learn,代码行数:7,代码来源:test_pca.py

示例14: pca_data

def pca_data(test_x, train_x, params):
    print 'pcaing data ...'
    components = int(params['components'])
    pca = RandomizedPCA(components, whiten=True).fit(train_x)
    pca_train_x = pca.transform(train_x)
    pca_test_x  = pca.transform(test_x)
    return pca_test_x, pca_train_x
开发者ID:123fengye741,项目名称:FaceRetrieval,代码行数:7,代码来源:pre_process.py

示例15: compute_pca

def compute_pca(reception_stats,n_components=5):
    reception_mean = reception_stats.mean(axis=0)
    pca = RandomizedPCA(n_components-1)
    pca.fit(reception_stats)
    pca_components = np.vstack([reception_mean,pca.components_])

    return pca,pca_components
开发者ID:AndrewRook,项目名称:phdfootball,代码行数:7,代码来源:wr_pca.py


注:本文中的sklearn.decomposition.RandomizedPCA类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。