当前位置: 首页>>代码示例>>Python>>正文


Python RandomTreesEmbedding.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomTreesEmbedding.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomTreesEmbedding.fit方法的具体用法?Python RandomTreesEmbedding.fit怎么用?Python RandomTreesEmbedding.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomTreesEmbedding的用法示例。


在下文中一共展示了RandomTreesEmbedding.fit方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: random_forest_embedding

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
    def random_forest_embedding(self, data, n_estimators=30, random_state=0, max_depth=3, min_samples_leaf=1):
        """
        learn a density with random forest representation
        """
        """
        scikit-learn only supports axis-align sepration, let's first stick to this and see how it works
        """
        # n_estimators = 400
        # random_state = 0
        # max_depth = 5
        rf_mdl = RandomTreesEmbedding(
            n_estimators=n_estimators,
            random_state=random_state,
            max_depth=max_depth,
            min_samples_leaf=min_samples_leaf)
        rf_mdl.fit(data)

        indices = rf_mdl.apply(data)
        samples_by_node = defaultdict(list)
        idx_by_node = defaultdict(list)
        #kde_by_node = defaultdict(KernelDensity)

        for idx, sample, est_data in zip(range(len(data)), data, indices):
            for est_ind, leaf in enumerate(est_data):
                samples_by_node[ est_ind, leaf ].append(sample)
                idx_by_node[ est_ind, leaf ].append(idx)

        res_mdl = dict()
        res_mdl['rf_mdl'] = rf_mdl
        res_mdl['samples_dict'] = samples_by_node
        res_mdl['idx_dict'] = idx_by_node
        # res_mdl['kde_dict'] = kde_by_node
        return res_mdl
开发者ID:navigator8972,项目名称:pytrajkin,代码行数:35,代码来源:pytrajkin_randemb.py

示例2: random_forest_embedding

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
def random_forest_embedding(data, n_estimators=400, random_state=0, max_depth=5, min_samples_leaf=1):
    """
    learn a density with random forest representation
    """
    """
    scikit-learn only supports axis-align sepration, let's first stick to this and see how it works
    """
    # n_estimators = 400
    # random_state = 0
    # max_depth = 5
    rf_mdl = RandomTreesEmbedding(
        n_estimators=n_estimators, 
        random_state=random_state, 
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf)
    rf_mdl.fit(data)
    
    # forestClf.fit(trainingData, trainingLabels)
    # indices = forestClf.apply(trainingData)
    # samples_by_node = defaultdict(list)
    # for est_ind, est_data in enumerate(indices.T):
    # for sample_ind, leaf in enumerate(est_data):
    # samples_by_node[ est_ind, leaf ].append(sample_ind)
    # indexOfSamples = samples_by_node[0,10]
    # # samples_by_node[treeIndex, leafIndex within that tree]
    # leafNodeSamples = trainingAngles[indexOfSamples]
    # kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(leafNodeSamples)

    indices = rf_mdl.apply(data)
    samples_by_node = defaultdict(list)
    idx_by_node = defaultdict(list)
    kde_by_node = defaultdict(KernelDensity)

    for idx, sample, est_data in zip(range(len(data)), data, indices):
        for est_ind, leaf in enumerate(est_data):
            samples_by_node[ est_ind, leaf ].append(sample)
            idx_by_node[ est_ind, leaf ].append(idx)

        
    #Kernel Density Estimation for each leaf node
    # for k,v in samples_by_node.iteritems():
    #     est_ind, leaf = k
          # params = {'bandwidth': np.logspace(-1, 1, 20)}
          # grid = GridSearchCV(KernelDensity(), params)
          # grid.fit(v)

    #     kde_by_node[ est_ind, leaf ] = grid.best_estimator_

    res_mdl = dict()
    res_mdl['rf_mdl'] = rf_mdl
    res_mdl['samples_dict'] = samples_by_node
    res_mdl['idx_dict'] = idx_by_node
    # res_mdl['kde_dict'] = kde_by_node
    return res_mdl
开发者ID:navigator8972,项目名称:nao_writing,代码行数:56,代码来源:utils.py

示例3: test_random_hasher

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two pca dimensions
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=0)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=0)
    assert_array_equal(hasher.fit(X).transform(X).toarray(), X_transformed.toarray())

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    pca = RandomizedPCA(n_components=2)
    X_reduced = pca.fit_transform(X_transformed)
    linear_clf = LinearSVC()
    linear_clf.fit(X_reduced, y)
    assert_equal(linear_clf.score(X_reduced, y), 1.0)
开发者ID:neufang,项目名称:scikit-learn,代码行数:22,代码来源:test_forest.py

示例4: cluster_testing

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
 def cluster_testing(self, testing):
     '''Create RandomTreesEmbedding of data'''
     clf = RandomTreesEmbedding(n_estimators=512, random_state=self.seed, max_depth=5)
     '''Fit testing data to training model'''
     clf.fit = self.clf.fit(testing)
     X_transformed = self.clf.fit_transform(testing)
     n_components = 2
     '''SVD transform data'''
     svd = TruncatedSVD(n_components=n_components)
     svd.clf = svd.fit(X_transformed)
     svd.model = svd.clf.transform(X_transformed)
     '''Train transformed data using original model'''
     train_transformed = clf.fit.transform(self.train_matrix)
     train_model = svd.clf.transform(train_transformed)
     '''Generate One Class SVM rejection criteria'''
     (clf_OCSVM_t, OCSVMmodel_t) = self.tools.determine_testing_data_similarity(train_model)
     predicted = []
     '''Remove testing compounds outside rejection margin'''
     for i in range(len(svd.model)):
         p = OCSVMmodel_t.predict(svd.model[i, :].reshape(1, -1))
         pred = OCSVMmodel_t.decision_function(svd.model[i, :].reshape(1, -1)).ravel()
         if (p == 1):
             predicted.append(i)
     return predicted
开发者ID:sandialabs,项目名称:BioCompoundML,代码行数:26,代码来源:cluster.py

示例5: docopt

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
    --n_estimators=<n>    Number of trees in the forest [default:10]
"""


import pandas as pd
import sys
import numpy as np
import cPickle
from sklearn.ensemble import RandomTreesEmbedding
from docopt import docopt

arguments = docopt(__doc__)
input_path = arguments["<training_set>"]
n = int(arguments["--n_estimators"])
output_path = arguments["<mapper_path>"]

print "Reading Data"
data = pd.read_csv(input_path,header=None).values[:,1:]


print "Constructing Mapper"
mapper = RandomTreesEmbedding(n_estimators=n)
mapper.fit(data)

print "Saving Mapper to {}".format(output_path)
with open(output_path,"w") as f:
    cPickle.dump(mapper,f)

    

开发者ID:celestrist,项目名称:image_retrieval,代码行数:29,代码来源:make_mapper.py

示例6: make_classification

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
n_estimator = 10
X, y = make_classification(n_samples=80000)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# It is important to train the ensemble of trees on a different subset
# of the training data than the linear regression model to avoid
# overfitting, in particular if the total number of leaves is
# similar to the number of training samples
X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.5)

# Unsupervised transformation based on totally random trees
rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator)
rt_lm = LogisticRegression()
rt.fit(X_train, y_train)
rt_lm.fit(rt.transform(X_train_lr), y_train_lr)

y_pred_rt = rt_lm.predict_proba(rt.transform(X_test))[:, 1]
fpr_rt_lm, tpr_rt_lm, _ = roc_curve(y_test, y_pred_rt)


# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression()
rf.fit(X_train, y_train)
rf_enc.fit(rf.apply(X_train))
rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)

y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
开发者ID:bwignall,项目名称:scikit-learn,代码行数:32,代码来源:plot_feature_transformation.py

示例7: make_circles

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
"""
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_selection import SelectFromModel
from sklearn.naive_bayes import BernoulliNB

# make a synthetic dataset
X, y = make_circles(factor=0.5, random_state=0, noise=0.05)

# use RandomTreesEmbedding to transform data
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
hasher.fit(X)
model = SelectFromModel(hasher, prefit=True)
X_transformed = model.transform(X)

# Visualize result using PCA
pca = TruncatedSVD(n_components=2)
X_reduced = pca.fit_transform(X_transformed)

# Learn a Naive Bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y)


# Learn an ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)
trees.fit(X, y)
开发者ID:ramja,项目名称:scikit-learn,代码行数:33,代码来源:plot_random_forest_embedding.py

示例8: EnsembleIOC

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
class EnsembleIOC(BaseEstimator, RegressorMixin):

    def __init__(self,  n_estimators=20, 
                        max_depth=5, min_samples_split=10, min_samples_leaf=10,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.random_state=random_state
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose
        return

    def fit(self, X, y=None):
        '''
        y could be the array of starting state of the demonstrated trajectories/policies
        if it is None, it implicitly implies a MaxEnt model. Other wise, it serves as the feature mapping
        of the starting state. This data might also be potentially used for learning the passive dynamics
        for a pure model-free learning with some regressors and regularization.
        '''
        #check parameters...
        assert(type(self.n_estimators)==int)
        assert(self.n_estimators > 0)
        assert(type(self.max_depth)==int)
        assert(self.max_depth > 0)
        assert(type(self.min_samples_split)==int)
        assert(self.min_samples_split > 0)
        assert(type(self.min_samples_leaf)==int)
        assert(self.min_samples_leaf > 0)
        assert(type(self.em_itrs)==int)

        #an initial partitioning of data with random forest embedding
        self.random_embedding_mdl_ = RandomTreesEmbedding(
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            random_state=self.random_state
            )

        #we probably do not need the data type to differentiate it is a demonstration
        #of trajectory or commanded state, do we?
        if self.passive_dyn_func is not None and self.passive_dyn_ctrl is not None and self.passive_dyn_noise is not None:
            self.random_embedding_mdl_.fit(X[:, X.shape[1]/2:])
            indices = self.random_embedding_mdl_.apply(X[:, X.shape[1]/2:])
            # X_tmp = np.array(X)
            # X_tmp[:, X.shape[1]/2:] = X_tmp[:, X.shape[1]/2:] - X_tmp[:, :X.shape[1]/2]
            # self.random_embedding_mdl_.fit(X_tmp)

            # indices = self.random_embedding_mdl_.apply(X_tmp)
        else:
            self.random_embedding_mdl_.fit(X)
            #figure out indices
            indices = self.random_embedding_mdl_.apply(X)

        partitioned_data = defaultdict(list)

        leaf_idx = defaultdict(set)
        weight_idx = defaultdict(float)
        #group data belongs to the same partition and have the weights...
        #is weight really necessary for EM steps? Hmm, seems to be for the initialization
        #d_idx: data index; p_idx: partition index (comprised of estimator index and leaf index)
        for d_idx, d, p_idx in zip(range(len(X)), X, indices):
            for e_idx, l_idx in enumerate(p_idx):
                partitioned_data[e_idx, l_idx].append(d)
                leaf_idx[e_idx] |= {l_idx}

            for e_idx, l_idx in enumerate(p_idx):
                weight_idx[e_idx, l_idx] = float(len(partitioned_data[e_idx, l_idx])) / len(X)
                # weight_idx[e_idx, l_idx] = 1. / len(p_idx)

        #for each grouped data, solve an easy IOC problem by assuming quadratic cost-to-go function
        #note that, if the passive dynamics need to be learned, extra steps is needed to train a regressor with weighted data
        #otherwise, just a simply gaussian for each conditional probability distribution model
        self.estimators_ = []
#.........这里部分代码省略.........
开发者ID:KlasKronander,项目名称:ensemble_ioc,代码行数:103,代码来源:ensemble_ioc.py

示例9: Clustering

# 需要导入模块: from sklearn.ensemble import RandomTreesEmbedding [as 别名]
# 或者: from sklearn.ensemble.RandomTreesEmbedding import fit [as 别名]
class Clustering():
    def __init__(self, compounds, output=False, seed=False):
        np.random.seed(seed=seed)
        self.seed = seed
        self.compounds = compounds
        self.count = 0
        self.count_1 = 0
        self.output = output
        self.tools = clustertools()
        if self.output is not False:
            self.figures = clusterfigures(self.compounds)
        self.testcompound = []

    def cluster_training(self, train, distance=False):
        '''
        This is the basic clustering function
        '''
        self.train_matrix = train.train
        '''
        Step one is to make sure that their is a distance matrix in place.
        It is best to feed an existing distance matrix if one is available.
        '''
        if distance is False:
            self.p_feat_matrix = self.tools.pairwise_distance_matrix(train.train, 'jaccard')
        else:
            self.p_feat_matrix = distance
        '''
        Step two is to cluster your data using a random trees embedding. This a
        random ensemble of trees. This is a transformation on the data, into a
        high dimensional, sparse space
        '''
        self.clf = RandomTreesEmbedding(n_estimators=512, random_state=self.seed, max_depth=5)
        #self.clf.fit(self.train_matrix)
        X_transformed = self.clf.fit_transform(self.train_matrix)
        '''
        Step three performs truncated SVD (similar to PCA). It operates on the sample
        vectors directly, rather than the covariance matrix. It takes the first two
        components. Essentially this reduces the sparse embedding to a low dimensional
        representation.
        '''
        self.svd = TruncatedSVD(n_components=2)
        self.svd.clf = self.svd.fit(X_transformed)
        self.model = self.svd.clf.transform(X_transformed)
        '''
        The next step is to take the transformed model and the original dataset and
        determine the max silhouette_score of clusters
        '''
        (self.cluster_assignment,
         self.cluster_num,
         self.cluster_score) = self.tools.identify_accurate_number_of_clusters(self.model, self.compounds)
        self.individualclusters = []
        '''
        The individual datapoints are assessed with regard to the best clustering scheme
        '''
        for i in range(self.cluster_num):
            self.individualclusters.append([])
            for j in range(len(self.cluster_assignment)):
                if self.cluster_assignment[j] == i:
                    self.individualclusters[i].append(self.model[j, :])
            self.individualclusters[i] = np.array(self.individualclusters[i])
        '''
        Finally, this clustering scheme is used to generate a one class Support
        Vector Machine decision boundary.
        '''
        (self.clf_OCSVM,
         self.OCSVM_model) = self.tools.determine_test_similarity(self.individualclusters)

    def cluster_testing(self, testing):
        '''Create RandomTreesEmbedding of data'''
        clf = RandomTreesEmbedding(n_estimators=512, random_state=self.seed, max_depth=5)
        '''Fit testing data to training model'''
        clf.fit = self.clf.fit(testing)
        X_transformed = self.clf.fit_transform(testing)
        n_components = 2
        '''SVD transform data'''
        svd = TruncatedSVD(n_components=n_components)
        svd.clf = svd.fit(X_transformed)
        svd.model = svd.clf.transform(X_transformed)
        '''Train transformed data using original model'''
        train_transformed = clf.fit.transform(self.train_matrix)
        train_model = svd.clf.transform(train_transformed)
        '''Generate One Class SVM rejection criteria'''
        (clf_OCSVM_t, OCSVMmodel_t) = self.tools.determine_testing_data_similarity(train_model)
        predicted = []
        '''Remove testing compounds outside rejection margin'''
        for i in range(len(svd.model)):
            p = OCSVMmodel_t.predict(svd.model[i, :].reshape(1, -1))
            pred = OCSVMmodel_t.decision_function(svd.model[i, :].reshape(1, -1)).ravel()
            if (p == 1):
                predicted.append(i)
        return predicted
开发者ID:sandialabs,项目名称:BioCompoundML,代码行数:93,代码来源:cluster.py


注:本文中的sklearn.ensemble.RandomTreesEmbedding.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。