当前位置: 首页>>代码示例>>Python>>正文


Python sklearn.clone函数代码示例

本文整理汇总了Python中sklearn.clone函数的典型用法代码示例。如果您正苦于以下问题:Python clone函数的具体用法?Python clone怎么用?Python clone使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了clone函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_auto_init

def test_auto_init(n_samples, n_features, n_classes, n_components):
    # Test that auto choose the init as expected with every configuration
    # of order of n_samples, n_features, n_classes and n_components.
    rng = np.random.RandomState(42)
    nca_base = NeighborhoodComponentsAnalysis(init='auto',
                                              n_components=n_components,
                                              max_iter=1,
                                              random_state=rng)
    if n_classes >= n_samples:
        pass
        # n_classes > n_samples is impossible, and n_classes == n_samples
        # throws an error from lda but is an absurd case
    else:
        X = rng.randn(n_samples, n_features)
        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
        if n_components > n_features:
            # this would return a ValueError, which is already tested in
            # test_params_validation
            pass
        else:
            nca = clone(nca_base)
            nca.fit(X, y)
            if n_components <= min(n_classes - 1, n_features):
                nca_other = clone(nca_base).set_params(init='lda')
            elif n_components < min(n_features, n_samples):
                nca_other = clone(nca_base).set_params(init='pca')
            else:
                nca_other = clone(nca_base).set_params(init='identity')
            nca_other.fit(X, y)
            assert_array_almost_equal(nca.components_, nca_other.components_)
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:30,代码来源:test_nca.py

示例2: calibrate_probs

def calibrate_probs(labels, weights, probs, logistic=False, random_state=11, threshold=0., return_calibrator=False, symmetrize=False):
    """
    Calibrate output to probabilities using 2-folding to calibrate all data
    
    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels 
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1 
    :param logistic: bool, use logistic or isotonic regression
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-
    
    :return: calibrated probabilities
    """
    labels = (labels > threshold) * 1
    ind = numpy.arange(len(probs))
    ind_1, ind_2 = train_test_split(ind, random_state=random_state, train_size=0.5)
    
    calibrator = LogisticRegression(C=100) if logistic else IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
    est_calib_1, est_calib_2 = clone(calibrator), clone(calibrator)
    probs_1 = probs[ind_1]
    probs_2 = probs[ind_2]
    
    if logistic:
        probs_1 = numpy.clip(probs_1, 0.001, 0.999)
        probs_2 = numpy.clip(probs_2, 0.001, 0.999)
        probs_1 = logit(probs_1)[:, numpy.newaxis]
        probs_2 = logit(probs_2)[:, numpy.newaxis]
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0])
        else:
            est_calib_1.fit(probs_1, labels[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2])
    else:
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0],
                            numpy.r_[weights[ind_1], weights[ind_1]])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0],
                            numpy.r_[weights[ind_2], weights[ind_2]])
        else:
            est_calib_1.fit(probs_1, labels[ind_1], weights[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2], weights[ind_2])
        
    calibrated_probs = numpy.zeros(len(probs))
    if logistic:
        calibrated_probs[ind_1] = est_calib_2.predict_proba(probs_1)[:, 1]
        calibrated_probs[ind_2] = est_calib_1.predict_proba(probs_2)[:, 1]
    else:
        calibrated_probs[ind_1] = est_calib_2.transform(probs_1)
        calibrated_probs[ind_2] = est_calib_1.transform(probs_2)
    if return_calibrator:
        return calibrated_probs, (est_calib_1, est_calib_2)
    else:
        return calibrated_probs
开发者ID:anuwish,项目名称:tagging_LHCb,代码行数:58,代码来源:utils.py

示例3: __init__

 def __init__(
     self,
     estimator=LinearSVC(),
     masker=NiftiMasker(),
     labelizer=LabelEncoder(),
     reporter=Reporter(),
     estimated_name="coef_",
 ):
     self.estimator = clone(estimator)
     self.masker = clone(masker)
     self.labelizer = clone(labelizer)
     self.reporter = reporter
     self.estimated_name = estimated_name
开发者ID:GaelVaroquaux,项目名称:nignore,代码行数:13,代码来源:decoding.py

示例4: classification_metrics

    def classification_metrics(self, X, y, n_iter=10, test_size=0.25, random_state=0):
        """
        returns the roc auc of the classifier binary only., and the portion of correct predictions via CV
        @param y: all non-zero will be set to 1
        @param n_iter, test_size: StratifiedShuffleSplit parameters
        @param random_state: random state used for StratifiedShuffleSplit
        @return: roc, accuracy, accuracy_zero, accuracy_one
        """

        roc = 0
        accuracy = 0
        accuracy_zero = 0  # portion of zeros correctly predicted
        accuracy_one = 0  # portion of ones correctly predicted

        y = np.array([0 if d == 0 else 1 for d in y])
        prePipe = clone(self.common_preprocessing_pipe)
        pipeToUse = clone(self.classifier_pipe)
        cvObj = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)

        for trainInds, testInds in cvObj:  # all cv data
            trainX = X[trainInds]
            trainY = y[trainInds]
            testX = X[testInds]
            testY = y[testInds]

            trainX = prePipe.fit_transform(trainX)
            testX = prePipe.transform(testX)
            pipeToUse.fit(trainX, trainY)
            y_scores = pipeToUse.predict_proba(testX)
            y_pred = pipeToUse.predict(testX)

            temp = next((i for i in range(len(testY)) if y_pred[i] == 1), None)

            roc += roc_auc_score(testY, y_scores[:, 1])
            accuracy += sum(y_pred == testY) * 1.0 / len(testY)
            accuracy_zero += 1.0 * sum(np.logical_and(y_pred == testY, testY == 0)) / sum(testY == 0)
            accuracy_one += 1.0 * sum(np.logical_and(y_pred == testY, testY == 1)) / sum(testY == 1)

        roc /= n_iter
        accuracy_zero /= n_iter
        accuracy_one /= n_iter
        accuracy /= n_iter

        print ">>> The classifier has roc = %0.3f, zero-accuracy = %0.3f, " "one-accuracy = %0.3f, overall accuracy = %0.3f." % (
            roc,
            accuracy_zero,
            accuracy_one,
            accuracy,
        )

        return roc, accuracy, accuracy_zero, accuracy_one
开发者ID:jennyyuejin,项目名称:Kaggle,代码行数:51,代码来源:BinThenReg.py

示例5: _fit_stage

    def _fit_stage(self, X, y, rmTolerance):
        """
        fits one stage of gradient boosting
        @param X:
        @param y:
        @param rmTolerance: tolerance for 1D optimization
        @return: nothing
        """

        residuals = self.lossFunction.negative_gradient(y, self._currentPrediction)
        trainX, trainY, _, _ = splitTrainTest(X, residuals, 1-self.subsample)   # stochastic boosting. train only on a portion of the data

        if len(np.unique(trainY))==1:
            hm = MajorityPredictor().fit(trainY)
        else:
            cvObj = KFold(n=len(trainX), n_folds=self.cvNumFolds, indices=False, shuffle=True, random_state=self.randomState)

            # find the h that best mimics the negative gradient
            if self.n_jobs > 1:  # parallel
                n_jobs = max(1, self.n_jobs/len(self.learners), self.cvNumFolds)
                # n_jobs = 1
                pool = MyPool(processes=self.n_jobs, initializer=gbjjInit, initargs=(trainX, trainY, self.lossFunction, n_jobs, cvObj))
                temp = pool.map_async(gbjjInnerLoop, self.learners)
                temp.wait()
                h_res = temp.get()
                pool.close()
                pool.join()

            else:   # single thread
                h_res = []

                for learner in self.learners:
                    if self.verbosity >= 2:
                        print 'Fitting learner:', learner
                    l = clone(learner)
                    scores = jjcross_val_score(l, trainX, trainY, score_func=self.lossFunction, n_jobs=1, cv=cvObj)
                    h_res.append(scores.mean())

            hm = clone(self.learners[np.argsort(h_res)[0]])

        if self.verbosity>=1:
            print "The best classifier is", hm.__class__

        # find rm
        hm.fit(trainX, trainY)
        hmx = hm.predict(X)
        rm = minimize_scalar(lambda r: self.lossFunction(y, self._currentPrediction + r*hmx), tol=rmTolerance).x

        # append estimator and weight
        self._estimators.append((hm, rm))
开发者ID:jennyyuejin,项目名称:Kaggle,代码行数:50,代码来源:GradientBoost_JJ.py

示例6: fit

    def fit(self, X, y, sample_weight=None):
        assert isinstance(self.base_estimators, dict), 'Estimators should be passed in a dictionary'
        assert len(X) == len(y), 'the lengths are different'
        assert sample_weight is None or len(sample_weight) == len(y), 'the lengths are different'
        if sample_weight is None:
            sample_weight = numpy.ones(len(y))
        assert self.feature_name in X.columns, 'there is no feature %s' % self.feature_name
        self.columns_order = X.columns

        column = numpy.array(X[self.feature_name])
        self.column_values = list(set(column))
        self.stayed_columns = dict()        # value -> list of columns
        self.common_features = dict()       # (value_from, value_to) -> list of columns
        self.classifiers = dict()           # (value_from, value_to, classifier_name) -> classifier
        self.final_classifiers = dict()     # (value, classifier_name) -> classifier
        rows_dict = dict()                  # (value) -> boolean list of rows
        self.final_columns_orders = dict()  # (value) -> list of features
        for value in self.column_values:
            rows = numpy.array(X[self.feature_name] == value)
            rows_dict[value] = rows
            x_part = X.loc[rows, :]
            cols = pandas.notnull(x_part).all()
            self.stayed_columns[value] = cols[cols==True].keys()

        for value_to, rows_to in rows_dict.items():
            columns_to = self.stayed_columns[value_to]
            new_features = pandas.DataFrame()
            for value_from, rows_from in rows_dict.items():
                if value_from == value_to:
                    continue
                common_columns = list(set(self.stayed_columns[value_from]).union(set(self.stayed_columns[value_to])))
                common_columns.remove(self.feature_name)
                self.common_features[value_from, value_to] = common_columns
                for name, estimator in self.base_estimators.items():
                    rows_from = rows_dict[value_from]
                    new_classifier = sklearn.clone(estimator)\
                        .fit(X.loc[rows_from, common_columns], y[rows_from], sample_weight=sample_weight[rows_from])

                    self.classifiers[value_from, value_to, name] = new_classifier
                    new_feature = new_classifier.predict_proba(X.loc[rows_to, common_columns])[:, 1]
                    new_features[str(value_from) + "_" + name] = new_feature
            X_to_part = X.loc[rows_to, columns_to]
            new_features = new_features.set_index(X_to_part.index)
            X_to_part = pandas.concat([X_to_part, new_features], axis=1)
            final_classifier = sklearn.clone(self.final_estimator)
            final_classifier.fit(X_to_part, y[rows_to], sample_weight=sample_weight[rows_to])
            self.final_columns_orders[value_to] = X_to_part.columns
            self.final_classifiers[value_to] = final_classifier
        return self
开发者ID:particleist,项目名称:lhcb_trigger_ml,代码行数:49,代码来源:metaclassifiers.py

示例7: fit

    def fit(self, X, feature):
        try:
            feature = int(feature)
        except Exception:
            self.logger("feature should be int")
            raise TypeError("feature should be int")

        X = X.view(np.ndarray)
        self.input_col_count = X.shape[1]
        self.feature = feature
        my_X = Misc.exclude_cols(X, self.feature)
        my_y = X[:, self.feature]
        y_mean = np.mean(my_y)
        y_std = np.std(my_y)

        # ref: http://www.sciencedirect.com/science/article/pii/S0893608004002102
        self._learner.C = max(abs(y_mean + 3 * y_std), abs(y_mean - 3 * y_std))

        cvs = cv.KFold(len(X), 10, shuffle=True)
        output_errors = np.empty(0)
        for train, test in cvs:
            tmp_l = sklearn.clone(self._learner)
            tmp_l.fit(my_X[train, :], X[train, self.feature])
            output_errors = np.hstack((output_errors, tmp_l.predict(my_X[test, :]) - X[test, self.feature]))

        self.error_std = np.std(output_errors)
        self.error_mean = np.mean(output_errors)

        self._learner.fit(my_X, X[:, self.feature])

        return self
开发者ID:adrinjalali,项目名称:Network-Classifier,代码行数:31,代码来源:FCE.py

示例8: lambda_choice

def lambda_choice(penalty, lambdas, n_folds, K, y, n_iter=10000, verbose=0, n_jobs=-1):
    estimator = fista.Fista(penalty=penalty, n_iter=n_iter)
    infos = Parallel(n_jobs=n_jobs, verbose=verbose)(
            delayed(_sub_info)(clone(estimator), K, y, K, y, lambda_)
           for lambda_ in lambdas)

    return infos
开发者ID:JeanKossaifi,项目名称:FISTA,代码行数:7,代码来源:cross_val.py

示例9: run

 def run(self, directory="datasets/"):
     loader = ArffLoader("{}/{}.arff".format(directory, self.dataset))
     inputs, labels = loader.get_dataset()
     n_features = inputs.shape[1]
     if self.subset_size >= n_features:
         return None
     if self.normalize:
         preprocessing.normalize(inputs, copy=False)
     results = {
         "experiment": self,
         "scores": {scorer_name: numpy.zeros(self.n_runs) for scorer_name, _ in self.scorers},
         "score_times": {scorer_name: numpy.zeros(self.n_runs) for scorer_name, _ in self.scorers},
         "errors": {classifier_name: numpy.zeros(self.n_runs) for classifier_name, _ in self.classifiers},
         "classifier_times": {classifier_name: numpy.zeros(self.n_runs) for classifier_name, _ in self.classifiers}
     }
     for run in range(self.n_runs):
         numpy.random.seed(run)
         indices = numpy.random.choice(n_features, size=self.subset_size, replace=False)
         inputs_subset = inputs[:, indices].copy()
         for scorer_name, scorer in self.scorers:
             score, t = self._execute_score_run(run, scorer, inputs_subset, labels)
             results["scores"][scorer_name][run] = score
             results["score_times"][scorer_name][run] = t
         for classifier_name, classifier in self.classifiers:
             error, t = self._execute_classifier_run(run, sklearn.clone(classifier), inputs_subset, labels)
             results["errors"][classifier_name][run] = error
             results["classifier_times"][classifier_name][run] = t
     return results
开发者ID:etamponi,项目名称:emetrics,代码行数:28,代码来源:random_subsets_experiment.py

示例10: param_search

def param_search(estimator, param_dict, n_iter=None, seed=None):
    """
    Generator for cloned copies of `estimator` set with parameters
    as specified by `param_dict`. `param_dict` can contain either lists
    of parameter values (grid search) or a scipy distribution function
    to be sampled from. If distributions, you must specify `n_iter`.

    Parameters:
    ___________

    estimator:
        sklearn-like estimator

    param_dict:
        dict of parameter name: values, where values can be an iterable
        or a distribution function

    n_iter:
        number of draws to take from parameter distributions
    """

    if n_iter is None:
        param_iter = ParameterGrid(param_dict)
    else:
        param_iter = ParameterSampler(param_dict,
                                      n_iter,
                                      random_state=seed)

    estimators = []
    for params in param_iter:
        new_estimator = sklearn.clone(estimator)
        new_estimator.set_params(**params)
        estimators.append(new_estimator)
    return estimators
开发者ID:Afey,项目名称:ramp,代码行数:34,代码来源:shortcuts.py

示例11: make_per_customer_submission

def make_per_customer_submission(
    dataset_name, estimator, x_transformation=identity, y_transformation=identity, include_variables=None
):
    """BROKEN FOR NOW!"""
    with open(j(DATA_DIR, dataset_name, "per-customer-train.pickle"), "rb") as f:
        dv, train_customers, train_y, train_x, train_weights = pickle.load(f)

    choose_columnns = lambda x: x[
        :, np.array(get_feature_indexi(dv.get_feature_names(), include_variables), dtype="int")
    ]
    x_all_transforms = lambda x: x_transformation(choose_columnns(x))

    model = clone(estimator)
    try:
        model.fit(x_all_transforms(train_x), y_transformation(train_y), sample_weight=train_weights)
    except TypeError:
        print("%s doesn't support `sample_weight`. Ignoring it." % str(model))
        model.fit(x_all_transforms(train_x), y_transformation(train_y))

    with open(j(DATA_DIR, dataset_name, "per-customer-test.pickle"), "rb") as f:
        _, test_customers, test_y, test_x, test_weights = pickle.load(f)

    with open(j("submissions", "%s.csv" % datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")), "w") as f:
        f.write("customer_ID,plan\n")
        for c, ps in zip(test_customers, model.predict(x_all_transforms(test_x))):
            f.write("%s,%s\n" % (c, "".join(str(pp) for pp in ps)))
开发者ID:kalaidin,项目名称:kaggle-rainicorn,代码行数:26,代码来源:per_customer_eval.py

示例12: fit

    def fit(self, original, target, original_weight=None, target_weight=None):
        """
        Prepare reweighting formula by training a sequence of trees.

        :param original: values from original distribution, array-like of shape [n_samples, n_features]
        :param target: values from target distribution, array-like of shape [n_samples, n_features]
        :param original_weight: weights for samples of original distributions
        :param target_weight: weights for samples of original distributions
        :return: self
        """
        original, original_weight = self._normalize_input(original, original_weight, normalize=False)
        target, target_weight = self._normalize_input(target, target_weight, normalize=False)

        folds_original = self._get_folds_column(len(original))
        folds_target = self._get_folds_column(len(target))
        for _ in range(self.n_folds):
            self.reweighters.append(clone(self.base_reweighter))

        original = numpy.array(original)
        target = numpy.array(target)

        for i in range(self.n_folds):
            self.reweighters[i].fit(original[folds_original != i, :], target[folds_target != i, :],
                                    original_weight=original_weight[folds_original != i],
                                    target_weight=target_weight[folds_target != i])
        self.train_length = len(original)
        return self
开发者ID:arogozhnikov,项目名称:hep_ml,代码行数:27,代码来源:reweight.py

示例13: train

    def train(self,num_examples,deltas=list(range(1,6)),use_transformations=False,use_weights=True,verbosity=0):
        '''Train ensemble of classifiers using newly generated data for every
        member of the ensemble.

        '''
        time_start = time.time()
        
        self.classifiers = dict()

        for delta in deltas:
            self.classifiers[delta] = [] # list for ensemble of classifiers
            for i in range(self.n_estimators):
                # base classifier
                clf = clone(self.base_classifier)
                
                if use_weights:
                    train_x,train_y,train_w = self.make_weighted_training_data(create_examples(num_examples=num_examples,deltas=[delta]),use_transformations=use_transformations)
                    if verbosity>1:
                        print ('delta={0}, #{1}: training with {2} weighted examples'.format(delta,i,len(train_x)))
                    # fit
                    clf.fit(train_x,train_y,sample_weight=train_w)
                else:
                    train_x,train_y = self.make_training_data(create_examples(num_examples=num_examples,deltas=[delta]),use_transformations=use_transformations)
                    if verbosity>1:
                        print ('delta={0}, #{1}: training with {2} examples'.format(delta,i,len(train_x)))
                    clf.fit(train_x,train_y)
                    
                self.classifiers[delta].append(clf)
        
        time_end = time.time()
        if verbosity>0:
            print('training completed in {0:.1f} seconds'.format(time_end-time_start))
开发者ID:valisc,项目名称:reverse-game-of-life,代码行数:32,代码来源:local_classifier.py

示例14: test_no_attributes_set_in_init

def test_no_attributes_set_in_init(estimator, preprocessor):
  """Check setting during init. Adapted from scikit-learn."""
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  if hasattr(type(estimator).__init__, "deprecated_original"):
      return

  init_params = _get_args(type(estimator).__init__)
  parents_init_params = [param for params_parent in
                         (_get_args(parent) for parent in
                          type(estimator).__mro__)
                         for param in params_parent]

  # Test for no setting apart from parameters during init
  invalid_attr = (set(vars(estimator)) - set(init_params) -
                  set(parents_init_params))
  assert not invalid_attr, \
      ("Estimator %s should not set any attribute apart"
       " from parameters during init. Found attributes %s."
       % (type(estimator).__name__, sorted(invalid_attr)))
  # Ensure that each parameter is set in init
  invalid_attr = (set(init_params) - set(vars(estimator)) -
                  set(["self"]))
  assert not invalid_attr, \
      ("Estimator %s should store all parameters"
       " as an attribute during init. Did not find "
       "attributes %s." % (type(estimator).__name__, sorted(invalid_attr)))
开发者ID:all-umass,项目名称:metric-learn,代码行数:27,代码来源:test_sklearn_compat.py

示例15: test_various_scoring_on_tuples_learners

def test_various_scoring_on_tuples_learners(estimator, build_dataset,
                                            with_preprocessor):
  """Tests that scikit-learn's scoring returns something finite,
  for other scoring than default scoring. (List of scikit-learn's scores can be
  found in sklearn.metrics.scorer). For each type of output (predict,
  predict_proba, decision_function), we test a bunch of scores.
  We only test on pairs learners because quadruplets don't have a y argument.
  """
  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  set_random_state(estimator)

  # scores that need a predict function: every tuples learner should have a
  # predict function (whether the pair is of positive samples or negative
  # samples)
  for scoring in ['accuracy', 'f1']:
    check_score_is_finite(scoring, estimator, input_data, labels)
  # scores that need a predict_proba:
  if hasattr(estimator, "predict_proba"):
    for scoring in ['neg_log_loss', 'brier_score']:
      check_score_is_finite(scoring, estimator, input_data, labels)
  # scores that need a decision_function: every tuples learner should have a
  # decision function (the metric between points)
  for scoring in ['roc_auc', 'average_precision', 'precision', 'recall']:
    check_score_is_finite(scoring, estimator, input_data, labels)
开发者ID:all-umass,项目名称:metric-learn,代码行数:26,代码来源:test_sklearn_compat.py


注:本文中的sklearn.clone函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。