本文整理汇总了Python中sklearn.ensemble.RandomTreesEmbedding.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomTreesEmbedding.fit_transform方法的具体用法?Python RandomTreesEmbedding.fit_transform怎么用?Python RandomTreesEmbedding.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomTreesEmbedding
的用法示例。
在下文中一共展示了RandomTreesEmbedding.fit_transform方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_random_hasher_sparse_data
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def test_random_hasher_sparse_data():
X, y = datasets.make_multilabel_classification(return_indicator=True,
random_state=0)
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X_transformed = hasher.fit_transform(X)
X_transformed_sparse = hasher.fit_transform(csc_matrix(X))
assert_array_equal(X_transformed_sparse.toarray(), X_transformed.toarray())
示例2: test_random_trees_dense_equal
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def test_random_trees_dense_equal():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning the same array for both argument values.
# Create the RTEs
hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False, random_state=0)
hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True, random_state=0)
X, y = datasets.make_circles(factor=0.5)
X_transformed_dense = hasher_dense.fit_transform(X)
X_transformed_sparse = hasher_sparse.fit_transform(X)
# Assert that dense and sparse hashers have same array.
assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)
示例3: rt_embedding
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def rt_embedding(X, n_estimators=100, max_depth=10, n_jobs=-1):
"""Embed data matrix X in a random forest.
Parameters
----------
X : array, shape (n_samples, n_features)
The data matrix.
n_estimators : int, optional
The number of trees in the embedding.
max_depth : int, optional
The maximum depth of each tree.
n_jobs : int, optional
Number of compute jobs when fitting the trees. -1 means number
of processors on the current computer.
Returns
-------
rt : RandomTreesEmbedding object
The embedding object.
X_transformed : sparse matrix
The transformed data.
"""
rt = RandomTreesEmbedding(n_estimators=n_estimators, max_depth=max_depth,
n_jobs=n_jobs)
X_transformed = rt.fit_transform(X)
return rt, X_transformed
示例4: test_random_trees_dense_type
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def test_random_trees_dense_type():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning a dense array.
# Create the RTE with sparse=False
hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
assert_equal(type(X_transformed), np.ndarray)
示例5: do_TRT
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def do_TRT(ne = 10, md = 3):
from sklearn.ensemble import RandomTreesEmbedding
from sklearn.naive_bayes import BernoulliNB
train_X, train_Y, test_X, test_Y = analysis_glass()
all_X = np.vstack((train_X, test_X))
hasher = RandomTreesEmbedding(n_estimators=ne,\
random_state=0, max_depth=md)
all_X_trans = hasher.fit_transform(all_X)
train_X_trans = all_X[0:149, :]
test_X_trans = all_X[149:, :]
nb = BernoulliNB()
nb.fit(train_X_trans, train_Y)
return nb.score(test_X_trans, test_Y)
示例6: test_random_hasher
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two pca dimensions
hasher = RandomTreesEmbedding(n_estimators=30, random_state=0)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=0)
assert_array_equal(hasher.fit(X).transform(X).toarray(), X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
pca = RandomizedPCA(n_components=2)
X_reduced = pca.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.0)
示例7: make_circles
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
original data.
"""
import pylab as pl
import numpy as np
from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import RandomizedPCA
from sklearn.naive_bayes import BernoulliNB
# make a synthetic dataset
X, y = make_circles(factor=0.5, random_state=0, noise=0.05)
# use RandomTreesEmbedding to transform data
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
X_transformed = hasher.fit_transform(X)
# Visualize result using PCA
pca = RandomizedPCA(n_components=2)
X_reduced = pca.fit_transform(X_transformed)
# Learn a Naive Bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y)
# Learn an ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)
trees.fit(X, y)
示例8: random_forest_embedding
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
def random_forest_embedding():
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.naive_bayes import BernoulliNB
#建立数据集
X, y = make_circles(factor = 0.5, random_state = 0, noise = 0.05)
#print y
#print X.shape #X 是100 * 2, y是100 * 1 (0,1数组)
#Transform data
hasher = RandomTreesEmbedding(n_estimators = 10, random_state = 0, max_depth = 3) #设置参数,生成model
X_transformed = hasher.fit_transform(X)
#print X_transformed[99]
#print X_transformed.shape #100 * 74 ? 可能是如下原因 -- 为什么利用高维稀疏表示之后可以有助于分类?
#RandomTreesEmbedding provides a way to map data to a very high-dimensional,
#sparse representation, which might be beneficial for classification.
pca = TruncatedSVD(n_components = 2)
X_reduced = pca.fit_transform(X_transformed)
#print X_reduced #这里是X_reduced 是 100 * 2
#Learn a Naive bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y) #利用高维稀疏矩阵和y进行训练
#Learn a ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth = 3, n_estimators = 10, random_state = 0)
trees.fit(X, y) #这里是利用原始的2维X和y进行训练
#scatter plot of original and reduced data
fig = plt.figure(figsize = (9, 8))
ax = plt.subplot(221)
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_title("Original Data(2d)")
ax.set_xticks(())
ax.set_yticks(())
ax = plt.subplot(222)
#注意虽然X在转化之后了,但是对应的label没有变,所以可以根据label来分析transfrom的效果
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 50)
ax.set_title("pca reduction (2d) of transformed data (%dd)" % X_transformed.shape[1])
ax.set_xticks(())
ax.set_yticks(())
#Plot the decision in original space
h = 0.01
x_min, x_max = X[:, 0].min() - 0.5, X[:,0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:,1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
#transform grid using RandomTreesEmbedding
#利用nb来做predict
transformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])
y_grid_pred = nb.predict_proba(transformed_grid)[:, 1]
ax = plt.subplot(223)
ax.set_title("Naive Bayes on Transformed data")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
#transform grid using ExtraTreesClassifier
#利用trees做predict
y_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
ax = plt.subplot(224)
ax.set_title("ExtraTrees predictions")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
plt.tight_layout()
plt.show()
示例9: Clustering
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
class Clustering():
def __init__(self, compounds, output=False, seed=False):
np.random.seed(seed=seed)
self.seed = seed
self.compounds = compounds
self.count = 0
self.count_1 = 0
self.output = output
self.tools = clustertools()
if self.output is not False:
self.figures = clusterfigures(self.compounds)
self.testcompound = []
def cluster_training(self, train, distance=False):
'''
This is the basic clustering function
'''
self.train_matrix = train.train
'''
Step one is to make sure that their is a distance matrix in place.
It is best to feed an existing distance matrix if one is available.
'''
if distance is False:
self.p_feat_matrix = self.tools.pairwise_distance_matrix(train.train, 'jaccard')
else:
self.p_feat_matrix = distance
'''
Step two is to cluster your data using a random trees embedding. This a
random ensemble of trees. This is a transformation on the data, into a
high dimensional, sparse space
'''
self.clf = RandomTreesEmbedding(n_estimators=512, random_state=self.seed, max_depth=5)
#self.clf.fit(self.train_matrix)
X_transformed = self.clf.fit_transform(self.train_matrix)
'''
Step three performs truncated SVD (similar to PCA). It operates on the sample
vectors directly, rather than the covariance matrix. It takes the first two
components. Essentially this reduces the sparse embedding to a low dimensional
representation.
'''
self.svd = TruncatedSVD(n_components=2)
self.svd.clf = self.svd.fit(X_transformed)
self.model = self.svd.clf.transform(X_transformed)
'''
The next step is to take the transformed model and the original dataset and
determine the max silhouette_score of clusters
'''
(self.cluster_assignment,
self.cluster_num,
self.cluster_score) = self.tools.identify_accurate_number_of_clusters(self.model, self.compounds)
self.individualclusters = []
'''
The individual datapoints are assessed with regard to the best clustering scheme
'''
for i in range(self.cluster_num):
self.individualclusters.append([])
for j in range(len(self.cluster_assignment)):
if self.cluster_assignment[j] == i:
self.individualclusters[i].append(self.model[j, :])
self.individualclusters[i] = np.array(self.individualclusters[i])
'''
Finally, this clustering scheme is used to generate a one class Support
Vector Machine decision boundary.
'''
(self.clf_OCSVM,
self.OCSVM_model) = self.tools.determine_test_similarity(self.individualclusters)
def cluster_testing(self, testing):
'''Create RandomTreesEmbedding of data'''
clf = RandomTreesEmbedding(n_estimators=512, random_state=self.seed, max_depth=5)
'''Fit testing data to training model'''
clf.fit = self.clf.fit(testing)
X_transformed = self.clf.fit_transform(testing)
n_components = 2
'''SVD transform data'''
svd = TruncatedSVD(n_components=n_components)
svd.clf = svd.fit(X_transformed)
svd.model = svd.clf.transform(X_transformed)
'''Train transformed data using original model'''
train_transformed = clf.fit.transform(self.train_matrix)
train_model = svd.clf.transform(train_transformed)
'''Generate One Class SVM rejection criteria'''
(clf_OCSVM_t, OCSVMmodel_t) = self.tools.determine_testing_data_similarity(train_model)
predicted = []
'''Remove testing compounds outside rejection margin'''
for i in range(len(svd.model)):
p = OCSVMmodel_t.predict(svd.model[i, :].reshape(1, -1))
pred = OCSVMmodel_t.decision_function(svd.model[i, :].reshape(1, -1)).ravel()
if (p == 1):
predicted.append(i)
return predicted
示例10: UnsupervisedVisualBagClassifier
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit_transform [as 别名]
class UnsupervisedVisualBagClassifier(Classifier):
"""
===============================
UnsupervisedVisualBagClassifier
===============================
1. Unsupervised
2. Binary bag of words
3. Totally random trees
"""
def __init__(self, coordinator, base_classifier, n_estimators=10,
max_depth=5, min_samples_split=2, min_samples_leaf=1,
n_jobs=-1, random_state=None, verbose=0, min_density=None):
Classifier.__init__(self, coordinator, base_classifier)
self.histoSize = 0
self._visualBagger = RandomTreesEmbedding(n_estimators=n_estimators,
max_depth=max_depth,
min_samples_split=min_samples_split,
min_samples_leaf=min_samples_leaf,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
min_density=min_density)
def _preprocess(self, image_buffer, learningPhase):
if learningPhase:
self.setTask(1, "Extracting the features (model creation)")
else:
self.setTask(1, "Extracting the features (prediction)")
X_pred, y = self._coord.process(image_buffer,
learningPhase=learningPhase)
y_user = self._convertLabel(y)
#Cleaning up
self._coord.clean(y)
del y
self.endTask()
#Bag-of-word transformation
self.setTask(1, "Transforming data into bag-of-words (Tree part)")
X2 = None
if learningPhase:
X2 = self._visualBagger.fit_transform(X_pred, y_user)
self.histoSize = X2.shape[1]
else:
X2 = self._visualBagger.transform(X_pred)
#Cleaning up
self._coord.clean(X_pred)
del X_pred
del y_user
self.endTask()
nbFactor = X2.shape[0] // len(image_buffer)
if not sps.isspmatrix_csr(X2):
X2 = X2.tocsr()
if nbFactor == 1:
return X2
self.setTask(len(image_buffer), "Transforming data into bag-of-words (Histogram part)")
nbTrees = self._visualBagger.n_estimators
X3 = computeHistogram(len(image_buffer), nbFactor, nbTrees, X2)
self.endTask()
#Cleaning up
del X2 # Should be useless
return X3
def fit_histogram(self, hist, y):
#Delegating the classification
self.setTask(1, "Learning the model")
self._classifier.fit(hist, y)
self.endTask()
return self
def fit(self, image_buffer):
"""
Fits the data contained in the :class:`ImageBuffer` instance
Parameters
-----------
image_buffer : :class:`ImageBuffer`
The data to learn from
Return
-------
self : :class:`Classifier`
This instance
#.........这里部分代码省略.........