本文整理汇总了Python中sklearn.ensemble.RandomTreesEmbedding.transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomTreesEmbedding.transform方法的具体用法?Python RandomTreesEmbedding.transform怎么用?Python RandomTreesEmbedding.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomTreesEmbedding
的用法示例。
在下文中一共展示了RandomTreesEmbedding.transform方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduction
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import transform [as 别名]
ax = pl.subplot(222)
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, s=50)
ax.set_title("PCA reduction (2d) of transformed data (%dd)" %
X_transformed.shape[1])
ax.set_xticks(())
ax.set_yticks(())
# Plot the decision in original space. For that, we will assign a color to each
# point in the mesh [x_min, m_max] x [y_min, y_max].
h = .01
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# transform grid using RandomTreesEmbedding
transformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])
y_grid_pred = nb.predict_proba(transformed_grid)[:, 1]
ax = pl.subplot(223)
ax.set_title("Naive Bayes on Transformed data")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c=y, s=50)
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
# transform grid using ExtraTreesClassifier
y_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
ax = pl.subplot(224)
示例2: make_classification
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import transform [as 别名]
n_estimator = 10
X, y = make_classification(n_samples=80000)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# It is important to train the ensemble of trees on a different subset
# of the training data than the linear regression model to avoid
# overfitting, in particular if the total number of leaves is
# similar to the number of training samples
X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
y_train,
test_size=0.5)
# Unsupervised transformation based on totally random trees
rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator)
rt_lm = LogisticRegression()
rt.fit(X_train, y_train)
rt_lm.fit(rt.transform(X_train_lr), y_train_lr)
y_pred_rt = rt_lm.predict_proba(rt.transform(X_test))[:, 1]
fpr_rt_lm, tpr_rt_lm, _ = roc_curve(y_test, y_pred_rt)
# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression()
rf.fit(X_train, y_train)
rf_enc.fit(rf.apply(X_train))
rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
示例3: random_forest_embedding
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import transform [as 别名]
def random_forest_embedding():
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.naive_bayes import BernoulliNB
#建立数据集
X, y = make_circles(factor = 0.5, random_state = 0, noise = 0.05)
#print y
#print X.shape #X 是100 * 2, y是100 * 1 (0,1数组)
#Transform data
hasher = RandomTreesEmbedding(n_estimators = 10, random_state = 0, max_depth = 3) #设置参数,生成model
X_transformed = hasher.fit_transform(X)
#print X_transformed[99]
#print X_transformed.shape #100 * 74 ? 可能是如下原因 -- 为什么利用高维稀疏表示之后可以有助于分类?
#RandomTreesEmbedding provides a way to map data to a very high-dimensional,
#sparse representation, which might be beneficial for classification.
pca = TruncatedSVD(n_components = 2)
X_reduced = pca.fit_transform(X_transformed)
#print X_reduced #这里是X_reduced 是 100 * 2
#Learn a Naive bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y) #利用高维稀疏矩阵和y进行训练
#Learn a ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth = 3, n_estimators = 10, random_state = 0)
trees.fit(X, y) #这里是利用原始的2维X和y进行训练
#scatter plot of original and reduced data
fig = plt.figure(figsize = (9, 8))
ax = plt.subplot(221)
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_title("Original Data(2d)")
ax.set_xticks(())
ax.set_yticks(())
ax = plt.subplot(222)
#注意虽然X在转化之后了,但是对应的label没有变,所以可以根据label来分析transfrom的效果
ax.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 50)
ax.set_title("pca reduction (2d) of transformed data (%dd)" % X_transformed.shape[1])
ax.set_xticks(())
ax.set_yticks(())
#Plot the decision in original space
h = 0.01
x_min, x_max = X[:, 0].min() - 0.5, X[:,0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:,1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
#transform grid using RandomTreesEmbedding
#利用nb来做predict
transformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])
y_grid_pred = nb.predict_proba(transformed_grid)[:, 1]
ax = plt.subplot(223)
ax.set_title("Naive Bayes on Transformed data")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
#transform grid using ExtraTreesClassifier
#利用trees做predict
y_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
ax = plt.subplot(224)
ax.set_title("ExtraTrees predictions")
ax.pcolormesh(xx, yy, y_grid_pred.reshape(xx.shape))
ax.scatter(X[:, 0], X[:, 1], c = y, s = 50) #X[:, 0]是X坐标 X[:, 1]是Y坐标, y是label
ax.set_ylim(-1.4, 1.4)
ax.set_xlim(-1.4, 1.4)
ax.set_xticks(())
ax.set_yticks(())
plt.tight_layout()
plt.show()
示例4: UnsupervisedVisualBagClassifier
# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import transform [as 别名]
class UnsupervisedVisualBagClassifier(Classifier):
"""
===============================
UnsupervisedVisualBagClassifier
===============================
1. Unsupervised
2. Binary bag of words
3. Totally random trees
"""
def __init__(self, coordinator, base_classifier, n_estimators=10,
max_depth=5, min_samples_split=2, min_samples_leaf=1,
n_jobs=-1, random_state=None, verbose=0, min_density=None):
Classifier.__init__(self, coordinator, base_classifier)
self.histoSize = 0
self._visualBagger = RandomTreesEmbedding(n_estimators=n_estimators,
max_depth=max_depth,
min_samples_split=min_samples_split,
min_samples_leaf=min_samples_leaf,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
min_density=min_density)
def _preprocess(self, image_buffer, learningPhase):
if learningPhase:
self.setTask(1, "Extracting the features (model creation)")
else:
self.setTask(1, "Extracting the features (prediction)")
X_pred, y = self._coord.process(image_buffer,
learningPhase=learningPhase)
y_user = self._convertLabel(y)
#Cleaning up
self._coord.clean(y)
del y
self.endTask()
#Bag-of-word transformation
self.setTask(1, "Transforming data into bag-of-words (Tree part)")
X2 = None
if learningPhase:
X2 = self._visualBagger.fit_transform(X_pred, y_user)
self.histoSize = X2.shape[1]
else:
X2 = self._visualBagger.transform(X_pred)
#Cleaning up
self._coord.clean(X_pred)
del X_pred
del y_user
self.endTask()
nbFactor = X2.shape[0] // len(image_buffer)
if not sps.isspmatrix_csr(X2):
X2 = X2.tocsr()
if nbFactor == 1:
return X2
self.setTask(len(image_buffer), "Transforming data into bag-of-words (Histogram part)")
nbTrees = self._visualBagger.n_estimators
X3 = computeHistogram(len(image_buffer), nbFactor, nbTrees, X2)
self.endTask()
#Cleaning up
del X2 # Should be useless
return X3
def fit_histogram(self, hist, y):
#Delegating the classification
self.setTask(1, "Learning the model")
self._classifier.fit(hist, y)
self.endTask()
return self
def fit(self, image_buffer):
"""
Fits the data contained in the :class:`ImageBuffer` instance
Parameters
-----------
image_buffer : :class:`ImageBuffer`
The data to learn from
Return
-------
self : :class:`Classifier`
This instance
#.........这里部分代码省略.........