当前位置: 首页>>代码示例>>Python>>正文


Python RandomizedPCA.fit_transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.RandomizedPCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA.fit_transform方法的具体用法?Python RandomizedPCA.fit_transform怎么用?Python RandomizedPCA.fit_transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.RandomizedPCA的用法示例。


在下文中一共展示了RandomizedPCA.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: reduce_features

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
	"""
	Performs feature reduction using PCA. Automatically selects nr. components
	for explaining min_var_explained variance.
	:param features: Features.
	:param var_explained: Minimal variance explained.
	:param n_components: Nr. of components.
	:param exclude_columns: Columns to exclude.
	:param verbose: Verbosity.
	:return: Reduced feature set.
	"""
	if n_components == 0:
		# Run full PCA to estimate nr. components for explaining given
		# percentage of variance.
		estimator = RandomizedPCA()
		estimator.fit_transform(features)
		variance = 0.0
		for i in range(len(estimator.explained_variance_ratio_)):
			variance += estimator.explained_variance_ratio_[i]
			if variance > var_explained:
				n_components = i + 1
				if verbose:
					print('{} % of variance explained using {} components'.format(var_explained, n_components))
				break
	# Re-run PCA with only estimated nr. components
	estimator = RandomizedPCA(n_components=n_components)
	features = estimator.fit_transform(features)
	return features
开发者ID:rbrecheisen,项目名称:scripts,代码行数:30,代码来源:prepare.py

示例2: build_classifier

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
    print "Attempting to build classifier."
    train_data_x = train_data_x_in
    transformer = ""
    # classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
    if classifier_in == "svc_basic":
        classifier = svm.SVC()
        print "Selection was basic svm.SVC."
    elif classifier_in == "svc_extensive":
        classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
        print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
    elif classifier_in == "kneighbors_basic":
        transformer = RandomizedPCA(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
    elif classifier_in == "bagging_basic":
        classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
        print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
    elif classifier_in == "spectral_basic":
        transformer = SpectralEmbedding(n_components=2000)
        train_data_x = transformer.fit_transform(train_data_x)
        classifier = KNeighborsClassifier()
        print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
    # default to SVC in case of any sort of parsing error.
    else:
        print "Error in selecting classifier class. Reverting to SVC."
        classifier = svm.SVC()
    classifier.fit(train_data_x, train_data_y)
    print "Doing classifier estimation."
    return classifier, train_data_x, transformer
开发者ID:RAMichel,项目名称:image_classifier,代码行数:33,代码来源:build_classifier.py

示例3: test_feature_union_weights

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = RandomizedPCA(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:33,代码来源:test_pipeline.py

示例4: pcaAndPlot

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def pcaAndPlot(X, x_to_centroids, centroids, no_dims = 2):
    pca = RandomizedPCA(n_components=no_dims)
    x_trans = pca.fit_transform(X)
    x_sizes = np.full((x_trans.shape[0]), 30, dtype=np.int)
    plt.scatter(x_trans[:, 0], x_trans[:, 1], s=x_sizes, c=x_to_centroids)
    centroids_trans = pca.fit_transform(centroids)
    centroids_col = np.arange(centroids.shape[0])
    centroids_sizes = np.full((centroids.shape[0]), 70, dtype=np.int)
    plt.scatter(centroids_trans[:, 0], centroids_trans[:, 1], s=centroids_sizes, c=centroids_col)
    plt.show()
开发者ID:fabi92,项目名称:deeplearning,代码行数:12,代码来源:Plotter.py

示例5: read_data_sets

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def read_data_sets():
	class DataSets(object):
		pass

	NUM_CLASSES = 7
	start = time.time()
	data_sets = DataSets()

	# Load the training data
	mat_contents = sio.loadmat('labeled_images.mat')
	train_labels = mat_contents['tr_labels']
	train_identities = mat_contents['tr_identity']
	train_images = mat_contents['tr_images']

	# Load the test data
	mat_contents = sio.loadmat('public_test_images.mat')
	test_images = mat_contents['public_test_images']
	test_set_length = len(test_images[0][0])

	# Flatten images
	test_images = flattenImages(test_images)
	train_images = flattenImages(train_images)

	# Split train into validation set of size ~ test_set_length
	train_images, train_labels, validation_images, validation_labels = splitSet(
		train_images,
		train_labels,
		train_identities,
		test_set_length)

	# Convert labels to one hot vectors
	train_labels = convertToOneHot(train_labels, NUM_CLASSES)
	validation_labels = convertToOneHot(validation_labels, NUM_CLASSES)

	# Normalize the images
	sd = np.sqrt(np.var(train_images) + 0.01)
	train_images = (train_images - np.mean(train_images)) / sd
	sd = np.sqrt(np.var(validation_images) + 0.01)
	validation_images = (validation_images - np.mean(validation_images)) / sd

	pca = RandomizedPCA(n_components=15)
	train_images = pca.fit_transform(train_images)
	validation_images = pca.fit_transform(validation_images)

	# Setup the matrixes into an accessible data set class
	data_sets.train_set = DataSet(train_images, train_labels)
	data_sets.validation_set = DataSet(validation_images, validation_labels)
	data_sets.test_set = DataSet(test_images, np.zeros((len(test_images), NUM_CLASSES)))


	print('Finished setting up data! Took {} seconds'.format(time.time() - start))

	return data_sets
开发者ID:lijian8,项目名称:Emotion-Recognition-,代码行数:55,代码来源:input_data.py

示例6: get_features_from_images_PCA

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def get_features_from_images_PCA(img_dir,data_set):
    
    """
    Takes in a directory and gets all the images from
    it and extracts the pixel values, flattens the matrix
    into an array and performs principle component analysis
    to get representative subset of features from the pixel
    values of the image.
    """
    
    print "\nExtracting features from given images..."
    img_names = [f for f in os.listdir(img_dir)]
    images = [img_dir+ f for f in os.listdir(img_dir)]
    #print images
    
    print "\nConverting images to vectors"
    data = []
    for image in images:
#        print image
        img = img_to_matrix(image)
        img = flatten_image(img)
        data.append(img)
    
    print "Converting image data to numpy array"
    
    time.sleep(5)
    data = np.array(data)
    print "Finished Conversion"
    time.sleep(5)
    
    print "\nPerforming PCA to get reqd features"
    features = []
    pca = RandomizedPCA(n_components=14)
    for i in xrange(len(data)/100):
        if features == []:
            split = data[0:100]
            features = pca.fit_transform(split)
        else:
            split = data[100*i:100*(i+1)]
            features = np.concatenate((features,pca.fit_transform(split)),axis=0)
    
    print "Writing feature data to file"
    f = open(data_set+"_extracted_features.txt","w")  
    for i in xrange(len(img_names)):
        s = str(img_names[i])
        for value in features[i]:
            s += " "+str(value)
        s += "\n"
        f.write(s)
    
    f.close()
    print "Write completed"
开发者ID:dipanjanS,项目名称:tag-me,代码行数:54,代码来源:img_processing_utils.py

示例7: main

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def main():
    img_dir = 'images/'
    images = [img_dir + f for f in os.listdir(img_dir)]
    labels = [f.split('/')[-1].split('_')[0] for f in images]
    label2ids = {v: i for i, v in enumerate(sorted(set(labels),
                                                   key=labels.index))}
    y = np.array([label2ids[l] for l in labels])

    data = []
    for image_file in images:
        img = img_to_matrix(image_file)
        img = flatten_image(img)
        data.append(img)
    data = np.array(data)

    # training samples
    is_train = np.random.uniform(0, 1, len(data)) <= 0.7
    train_X, train_y = data[is_train], y[is_train]

    # training a classifier
    pca = RandomizedPCA(n_components=5)
    train_X = pca.fit_transform(train_X)
    multi_svm = OneVsRestClassifier(LinearSVC())
    multi_svm.fit(train_X, train_y)

    # evaluating the model
    test_X, test_y = data[is_train == False], y[is_train == False]
    test_X = pca.transform(test_X)
    print pd.crosstab(test_y, multi_svm.predict(test_X),
                      rownames=['Actual'], colnames=['Predicted'])
开发者ID:satojkovic,项目名称:AutoGeoTagging,代码行数:32,代码来源:train_model.py

示例8: _prepare_pca

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
    def _prepare_pca(self, data, max_n_components):
        """ Helper Function """
        from sklearn.decomposition import RandomizedPCA

        # sklearn < 0.11 does not support random_state argument
        kwargs = {'n_components': max_n_components, 'whiten': False}

        aspec = inspect.getargspec(RandomizedPCA.__init__)
        if 'random_state' not in aspec.args:
            warnings.warn('RandomizedPCA does not support random_state '
                          'argument. Use scikit-learn to version 0.11 '
                          'or newer to get reproducible results.')
        else:
            kwargs['random_state'] = 0

        pca = RandomizedPCA(**kwargs)
        pca_data = pca.fit_transform(data.T)

        if self._explained_var > 1.0:
            if self.n_components is not None:  # normal n case
                self._comp_idx = np.arange(self.n_components)
                to_ica = pca_data[:, self._comp_idx]
            else:  # None case
                to_ica = pca_data
                self.n_components = pca_data.shape[1]
                self._comp_idx = np.arange(self.n_components)
        else:  # float case
            expl_var = pca.explained_variance_ratio_
            self._comp_idx = (np.where(expl_var.cumsum() <
                                      self._explained_var)[0])
            to_ica = pca_data[:, self._comp_idx]
            self.n_components = len(self._comp_idx)

        return to_ica, pca
开发者ID:starzynski,项目名称:mne-python,代码行数:36,代码来源:ica.py

示例9: do_pca

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def do_pca(corr_matrix: _nested_ndarray, num_dim: int,
    min_var_explanation: float =0.7) -> _nested_ndarray:
    '''
    This method performs PCA on a self-correlation matrix, reducing the number of columns to `num_dim`.
    If such analysis does not sufficiently explain the underlying variance in the data, an exception is
    thrown.
    
    Args:

    * `corr_matrix` - a square matrix of correlations
    * `num_dim` - the number of dimensions to which the data should be reduced
    * `min_var_explanation` - the minimum fraction of the underlying data variance that should be explained

    Returns:

    > A matrix of the PCA result on `corr_matrix`.
    '''

    num_dim = int(num_dim)
    pca = PCA(n_components=num_dim, random_state=0)
    pca_result = pca.fit_transform(corr_matrix)
    var_ratio = pca.explained_variance_ratio_
    if sum(var_ratio) < min_var_explanation:
        raise PcaAccuracyException(
            'PCA doesn\'t explain enough of the variance in the data')

    return pca_result
开发者ID:abhinavrk,项目名称:arkstock,代码行数:29,代码来源:helper.py

示例10: rpca

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def rpca(numpy_file='../data/Paintings/two_class/Paintings_train.csv'):
    """ Performs randomized PCA on given numpy file.

    Given a numpy file of n-rows and n-cols, where the last column is
    the label and rest are features,n-rows are the samples.

    :type numpy_file: string
    :param numpy_file: The file name of numpy file to be analyzed.
    """
    import numpy as np
    import matplotlib.pyplot as pl
    import pandas as pd
    from sklearn.decomposition import RandomizedPCA

    all_data = np.loadtxt(numpy_file,delimiter=',')
    data = all_data[:,:-1]
    y = all_data[:,-1]
    pca = RandomizedPCA(n_components=2)
    X = pca.fit_transform(data)
    df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1],\
                    "label":np.where(y==1, "realism", "abstract")})
    colors = ["red", "yellow"]
    for label, color in zip(df['label'].unique(), colors):
        mask = df['label']==label
        pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
    pl.legend()
    pl.title('Randomized PCA analysis')
    pl.show()
开发者ID:abhishekraok,项目名称:promising-patterns,代码行数:30,代码来源:utils_abhi.py

示例11: calc_hog

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def calc_hog(fpaths, save=False):
    '''
    Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
    Input:
        fpaths : files on which HOG will be computed
        save : if true, output is saved to disk
    '''

    hogs = np.empty((len(fpaths), 15876))

    for i, fpath in enumerate(fpaths):
        img = imread(os.path.join(imgdir, fpath))
        if len(img.shape)==3:
            img = rgb2gray(img)
        # rescale so all feature vectors are the same length
        img_resize = resize(img, (128, 128))
        img_hog = hog(img_resize)

        hogs[i, :] = img_hog

    hogs_sc = scale(hogs)
    n_components = 15
    pca = RandomizedPCA(n_components=n_components)
    hogs_decomp = pca.fit_transform(hogs_sc)

    df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
    df.index.name='fpath'
    df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
    if save: df.to_csv('hog.csv')
    
    return df
开发者ID:r-b-g-b,项目名称:AY250_HW,代码行数:33,代码来源:calcFeatures.py

示例12: scatter

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def scatter(data, labels=None, title=None, name=None):
    """2d PCA scatter plot with optional class info

    Return the pca model to be able to introspect the components or transform
    new data with the same model.
    """
    data = atleast2d_or_csr(data)

    if data.shape[1] == 2:
        # No need for a PCA:
        data_2d = data
    else:
        pca = RandomizedPCA(n_components=2)
        data_2d = pca.fit_transform(data)

    for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
        plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
                    c=c, marker=m, label=i, alpha=0.5)

    plt.legend(loc='best')
    if title is None:
        title = "2D PCA scatter plot"
        if name is not None:
            title += " for " + name
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.title(title)

    return pca
开发者ID:chrinide,项目名称:oglearn,代码行数:31,代码来源:visualization.py

示例13: main

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required = True, help = "Path to the image")
    args = vars(ap.parse_args())

    image = cv2.imread(args["image"])
    rects, img = detect(image)

    cropped = []

    for idx, (x1, y1, x2, y2) in enumerate(rects):
        crop_img = image[y1:y1 + (y2 - y1), x1:x1 + (x2 - x1)]
        crop_img = cv2.resize(crop_img, (100,100), interpolation = cv2.INTER_AREA)
        cv2.imshow("image" + str(idx), crop_img)
        new_img = crop_img.reshape(crop_img.shape[0] * crop_img.shape[1], 3)
        cropped.append(new_img.flatten())

    # reduce feature size
    cropped_pca = []
    pca = RandomizedPCA(n_components=100)
    cropped_pca = pca.fit_transform(cropped)

    # training (hardcoded for now)
    clf   = SVC(probability=True)
    train = cropped_pca[:7]
    test  = cropped_pca[7:13]
    # clf.fit([[0,0],[1,1]], [1, 2])
    clf.fit(train, [1,2,2,1,2,1,1])

    for item in test:
        print clf.predict_proba(item)
        print clf.predict(item)

    cv2.waitKey(0)
开发者ID:shulhi,项目名称:opencv-playground,代码行数:36,代码来源:crop_faces_ml.py

示例14: dimentionality_reduction

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def dimentionality_reduction(train_x , test_x):
	print "Dimentionality reduction to 10D on training and test data...."
	pca = RandomizedPCA(n_components=10)
	train_x = pca.fit_transform(train_x)
	test_x = pca.transform(test_x)
	print "Done."
	return train_x , test_x
开发者ID:shreyanshd,项目名称:Image-Classifier,代码行数:9,代码来源:image_classifier.py

示例15: detect

# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
    def detect(self, imageURLs, params):

        array = []
        for param in params:
            img = self.img_to_matrix(param['imageURL'])
            data = self.flatten_image(img)
            array.append(data)
        array = np.array(array)

        pca = RandomizedPCA(n_components=5)
        n_data = pca.fit_transform(array)

        clf = joblib.load('src/resource/models/model.pkl')
        result = clf.predict(n_data).tolist()

        for param, r in zip(params, result):
            raw_img = urllib2.urlopen(param['imageURL']).read()
            if r == 1:
                cntr = len([i for i in os.listdir("test/images/rain/") if 'rain' in i]) + 1
                path = "static/images/rain_" + str(cntr) + '.jpg'
                f = open(path, 'wb')
                f.write(raw_img)
                f.close()
                # イベント情報作成
                when = {'type': 'timestamp', 'time':param['time']}
                where = { "type": "Point", "coordinates": [param['longitude'], param['latitude']]}
                what = {'topic': {'value':u'雨'}, 'tweet': param['value']}
                who = [{"type": "url", "value": param['imageURL']},
                       {"value": "evwh <[email protected]>", "type": "author"}]
                event = {'observation':{'what': what, 'when': when, 'where': where, 'who': who}}
                self.connection['event']['TwitterImageRainSensor'].insert(event)
开发者ID:bulbulpaul,项目名称:image-detector,代码行数:33,代码来源:detect.py


注:本文中的sklearn.decomposition.RandomizedPCA.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。