本文整理汇总了Python中sklearn.base.clone函数的典型用法代码示例。如果您正苦于以下问题:Python clone函数的具体用法?Python clone怎么用?Python clone使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clone函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _fit_X_y
def _fit_X_y(self, X_clf, y_clf, X_cal, y_cal):
clf = clone(self.base_estimator)
if isinstance(clf, RegressorMixin):
clf = as_classifier(clf)
clf.fit(X_clf, y_clf)
if self.calibration is None:
return clf, None, None
else:
if self.calibration == "kde":
cal_num = KernelDensity()
cal_den = KernelDensity()
elif self.calibration == "histogram":
cal_num = Histogram(bins=100, range=[(0.0, 1.0)])
cal_den = Histogram(bins=100, range=[(0.0, 1.0)])
else:
cal_num = clone(self.calibration)
cal_den = clone(self.calibration)
X_num = clf.predict_proba(X_cal[y_cal == 0])[:, 0]
X_den = clf.predict_proba(X_cal[y_cal == 1])[:, 0]
cal_num.fit(X_num.reshape(-1, 1))
cal_den.fit(X_den.reshape(-1, 1))
return clf, cal_num, cal_den
示例2: pool_entropy_h
def pool_entropy_h(X, y, candidate_mask, train_mask, classifier, n_candidates,
pool_n, n_jobs=-1, **kwargs):
""" Return the candidate that will minimise the expected entropy of the predictions.
Parameters
----------
X_training_candidates : array
The feature matrix of the potential training candidates.
classes : int
The name of classes.
pool_n : int
The size of the sampel pool used in estimating the entropy
n_jobs : int
The number of parallel jobs (-1 if want to use all cores)
Returns
-------
best_candidate : int
The index of the best candidate.
"""
classes = classifier.classes_ # sorted lexicographically
n_classes = len(classes)
candidate_size = np.sum(train_mask)
n_features = X.shape[1]
entropy = np.empty(len(candidate_mask))
entropy[:] = np.inf
# the probabilities used to calculate expected value of pool
probs = classifier.predict_proba(X[candidate_mask])
# copy the classifier (avoid modifying the original classifier)
classifier_plus = clone(classifier)
# construct the sample pool (used to estimate the entropy)
unlabelled_indices = np.where(-train_mask)[0]
pool_indices = permutation(unlabelled_indices)[:pool_n]
pool_mask = np.zeros(len(candidate_mask), dtype=bool)
pool_mask[pool_indices] = True
# let's look at each candidate
candidate_indices = np.where(candidate_mask)[0]
results = Parallel(n_jobs=n_jobs)(delayed(_parallel_entropy_estimate)(
X, y.copy(), train_mask.copy(), pool_mask,
clone(classifier_plus), classes, n_classes, probs, i, index)
for i, index in enumerate(candidate_indices))
indices, expected = zip(*results)
indices, expected = np.asarray(indices), np.asarray(expected)
assert not np.isnan(expected).any(), 'Some expected values are undefined.'
entropy[indices] = expected
# pick the candidate with the smallest expected entropy
best_candidates = np.argsort(entropy)[:n_candidates]
return best_candidates
示例3: run_classifier
def run_classifier(out_folder, trend_probs, referrers, y, train, test):
F = referrers #static features
etree = create_grid_search('lr', n_jobs = 1)
y_pred = trend_probs[test].argmax(axis=1)
save_results(out_folder, 'tl-base-lr', y_pred, y[test])
aux = clone(etree)
aux.fit(F[train], y[train])
y_pred = aux.predict(F[test])
save_results(out_folder, 'tree-feats', y_pred, y[test])
aux = clone(etree)
aux.fit(trend_probs[train], y[train])
y_pred = aux.predict(trend_probs[test])
save_results(out_folder, 'tree-probs', y_pred, y[test])
C = np.hstack((F, trend_probs))
aux = clone(etree)
aux.fit(C[train], y[train])
y_pred = aux.predict(C[test])
save_results(out_folder, 'meta-combine', y_pred, y[test])
#stack_clf = stacking.Stacking(3, [etree], 'tree')
#stack_clf.fit(F[train], y[train], trend_probs[train])
#y_pred = stack_clf.predict(F[test], trend_probs[test])
#save_results(out_folder, 'meta-stack-tree', y_pred)
stack_clf = stacking.Stacking(3, [etree], 'linear')
stack_clf.fit(F[train], y[train], trend_probs[train])
y_pred = stack_clf.predict(F[test], trend_probs[test])
save_results(out_folder, 'meta-stack-linear', y_pred, y[test])
示例4: _validate_estimator
def _validate_estimator(self):
"Private function to validate SMOTE and ENN objects"
if self.smote is not None:
if isinstance(self.smote, SMOTE):
self.smote_ = clone(self.smote)
else:
raise ValueError('smote needs to be a SMOTE object.'
'Got {} instead.'.format(type(self.smote)))
# Otherwise create a default SMOTE
else:
self.smote_ = SMOTE(
sampling_strategy=self.sampling_strategy,
random_state=self.random_state,
n_jobs=self.n_jobs,
ratio=self.ratio)
if self.enn is not None:
if isinstance(self.enn, EditedNearestNeighbours):
self.enn_ = clone(self.enn)
else:
raise ValueError('enn needs to be an EditedNearestNeighbours.'
' Got {} instead.'.format(type(self.enn)))
# Otherwise create a default EditedNearestNeighbours
else:
self.enn_ = EditedNearestNeighbours(
sampling_strategy='all',
n_jobs=self.n_jobs)
示例5: _validate_estimator
def _validate_estimator(self):
"Private function to validate SMOTE and ENN objects"
if self.smote is not None:
if isinstance(self.smote, SMOTE):
self.smote_ = clone(self.smote)
else:
raise ValueError('smote needs to be a SMOTE object.'
'Got {} instead.'.format(type(self.smote)))
# Otherwise create a default SMOTE
else:
self.smote_ = SMOTE(
sampling_strategy=self.sampling_strategy,
random_state=self.random_state,
ratio=self.ratio)
if self.tomek is not None:
if isinstance(self.tomek, TomekLinks):
self.tomek_ = clone(self.tomek)
else:
raise ValueError('tomek needs to be a TomekLinks object.'
'Got {} instead.'.format(type(self.tomek)))
# Otherwise create a default TomekLinks
else:
self.tomek_ = TomekLinks(sampling_strategy='all')
示例6: test_sklearn_clone
def test_sklearn_clone():
tm._skip_if_no_sklearn()
from sklearn.base import clone
clf = xgb.XGBClassifier(n_jobs=2, nthread=3)
clf.n_jobs = -1
clone(clf)
示例7: _fit_calibrators
def _fit_calibrators(self, df0, df1):
df0 = df0.reshape(-1, 1)
df1 = df1.reshape(-1, 1)
if self.method == "kde":
calibrator0 = KernelDensity()
calibrator1 = KernelDensity()
elif self.method == "histogram":
eps = 0.05
df_min = max(0, min(np.min(df0), np.min(df1)) - eps)
df_max = min(1, max(np.max(df0), np.max(df1)) + eps)
calibrator0 = Histogram(bins=10 + int(len(df0) ** (1. / 3.)),
range=[(df_min, df_max)],
interpolation="linear")
calibrator1 = Histogram(bins=10 + int(len(df0) ** (1. / 3.)),
range=[(df_min, df_max)],
interpolation="linear")
else:
calibrator0 = clone(self.method)
calibrator1 = clone(self.method)
calibrator0.fit(df0)
calibrator1.fit(df1)
return calibrator0, calibrator1
示例8: test_kernel_clone_after_set_params
def test_kernel_clone_after_set_params():
# This test is to verify that using set_params does not
# break clone on kernels.
# This used to break because in kernels such as the RBF, non-trivial
# logic that modified the length scale used to be in the constructor
# See https://github.com/scikit-learn/scikit-learn/issues/6961
# for more details.
bounds = (1e-5, 1e5)
for kernel in kernels:
kernel_cloned = clone(kernel)
params = kernel.get_params()
# RationalQuadratic kernel is isotropic.
isotropic_kernels = (ExpSineSquared, RationalQuadratic)
if 'length_scale' in params and not isinstance(kernel,
isotropic_kernels):
length_scale = params['length_scale']
if np.iterable(length_scale):
params['length_scale'] = length_scale[0]
params['length_scale_bounds'] = bounds
else:
params['length_scale'] = [length_scale] * 2
params['length_scale_bounds'] = bounds * 2
kernel_cloned.set_params(**params)
kernel_cloned_clone = clone(kernel_cloned)
assert_equal(kernel_cloned_clone.get_params(),
kernel_cloned.get_params())
assert_not_equal(id(kernel_cloned_clone), id(kernel_cloned))
yield (check_hyperparameters_equal, kernel_cloned,
kernel_cloned_clone)
示例9: nn_embedding_translate
def nn_embedding_translate(words=en_2_es.keys(), embedding1=en_embedding, embedding2=es_embedding,
constraint=es_2_en.keys(), k=5,
pre_transform=None, log=False):
if pre_transform is not None:
pre_transform_1 = clone(pre_transform)
pre_transform_2 = clone(pre_transform)
embedding1 = transform(embedding1, pre_transform_1)
embedding2 = transform(embedding2, pre_transform_2)
if constraint is not None:
embedding2 = sub_embedding(embedding2, constraint)
in_vocab_words = [word for word in words if embedding1.normalize(word) is not None]
if log:
print "{} of {} words in vocab".format(len(in_vocab_words), len(words))
output = {}
for i, word in enumerate(in_vocab_words):
if log and i % 100 == 0:
print "{} of {} words".format(i, len(words))
emb = embedding1.word_to_embedding(word)
if emb is not None:
trans = embedding2.words_closest_to_point(emb, k=k)
trans = softmax(trans)
output[word] = trans
return output
示例10: make_classifiers
def make_classifiers(method, balanced, labels, selectors=None, columns=None, random_state=None):
estimators = {}
class_weight = None
if balanced:
class_weight = 'balanced'
# Make appropriate delegatation
if 'lr' in method:
estimator = LogisticRegression(n_jobs=1)
elif 'svm' in method:
estimator = SVC(probability=False)
elif 'rf' in method:
estimator = RandomForestClassifier(n_jobs=1)
else:
raise ValueError("Not implemented for method {}".format(method))
estimator = estimator.set_params(**{'class_weight': class_weight, 'random_state': random_state})
if hasattr(estimator, 'n_jobs'):
estimator.set_params(**{'n_jobs': 1})
if 'bagged' in method:
for l in labels:
named_estimators = zip(columns, [clone(estimator) for _ in columns])
weights = [1] * len(columns)
estimators[l] = HybridFeatureVotingClassifier(
named_estimators, selectors, voting='soft', weights=weights, n_jobs=4
)
else:
for l in labels:
estimators[l] = clone(estimator)
return estimators
示例11: RunExp
def RunExp(StrModel:str, Param:str, FeaUsed:list, DataPath:str, Label:str, StrMeasure:str, std:bool = False, N:int = 0):
Data = np.genfromtxt(DataPath + Label, delimiter = ',', dtype = int)
Data = Data[:, np.newaxis]
for f in FeaUsed:
T = (np.genfromtxt(DataPath + Features[f], delimiter = ',' , dtype = float))
if len(T.shape) < 2:
T = T[:, np.newaxis]
Data = np.concatenate((Data, T), axis = 1)
if N > 0:
Data = Data[:N, :]
Lbl = Data[:, 0]
Fea = Data[:,1:]
if std:
scaler = preprocessing.StandardScaler()
Fea = scaler.fit_transform(Fea)
Model = base.clone(Models[StrModel])
SetParam(Model, Param)
Model.fit(Fea, Lbl)
Pred = Model.predict(Fea)
st = Measures[StrMeasure](Lbl, Pred)
sv = cross_validation.cross_val_score(base.clone(Models[StrModel]), Fea, Lbl, metrics.make_scorer(Measures[StrMeasure]), cv = 5, n_jobs = 5)
return st, np.mean(sv)
示例12: fit
def fit(self, X):
param_grid = list(ParameterGrid(self.param_grid))
n_folds = len(self.cv)
n_grid = len(param_grid)
scores = np.zeros((n_folds, n_grid), dtype=np.float64)
for i, (X_tr, X_te) in enumerate(self.cv.split(X)):
for j, params in enumerate(param_grid):
estimator = clone(self.estimator)
estimator.set_params(**params)
estimator.fit(X_tr)
scores[i, j] = estimator.score(X_te)
# FIXME: handle higher is better as well.
best = scores.mean(axis=0).argmin()
self.best_params_ = param_grid[best]
# Refit
if self.refit:
self.best_estimator_ = clone(self.estimator)
self.best_estimator_.set_params(**self.best_params_)
self.best_estimator_.fit(X)
return self
示例13: train
def train(self,
training_trackers, # type: List[DialogueStateTracker]
domain, # type: Domain
**kwargs # type: **Any
):
# type: (...) -> Dict[Text: Any]
training_data = self.featurize_for_training(training_trackers,
domain,
**kwargs)
X, y = self._extract_training_data(training_data)
model = self.model_architecture(**kwargs)
score = None
# Note: clone is called throughout to avoid mutating default
# arguments.
self.label_encoder = clone(self.label_encoder).fit(y)
Xt, yt = self._preprocess_data(X, y)
if self.cv is None:
model = clone(model).fit(Xt, yt)
else:
param_grid = self.param_grid or {}
model, score = self._search_and_score(
model, Xt, yt, param_grid)
self.model = model
logger.info("Done fitting sklearn policy model")
if score is not None:
logger.info("Cross validation score: {:.5f}".format(score))
示例14: test_missing_value_handling
def test_missing_value_handling(est, func, support_sparse):
# check that the preprocessing method let pass nan
rng = np.random.RandomState(42)
X = iris.data.copy()
n_missing = 50
X[rng.randint(X.shape[0], size=n_missing),
rng.randint(X.shape[1], size=n_missing)] = np.nan
X_train, X_test = train_test_split(X, random_state=1)
# sanity check
assert not np.all(np.isnan(X_train), axis=0).any()
assert np.any(np.isnan(X_train), axis=0).all()
assert np.any(np.isnan(X_test), axis=0).all()
X_test[:, 0] = np.nan # make sure this boundary case is tested
Xt = est.fit(X_train).transform(X_test)
# missing values should still be missing, and only them
assert_array_equal(np.isnan(Xt), np.isnan(X_test))
# check that the function leads to the same results as the class
Xt_class = est.transform(X_train)
Xt_func = func(X_train, **est.get_params())
assert_array_equal(np.isnan(Xt_func), np.isnan(Xt_class))
assert_allclose(Xt_func[~np.isnan(Xt_func)], Xt_class[~np.isnan(Xt_class)])
# check that the inverse transform keep NaN
Xt_inv = est.inverse_transform(Xt)
assert_array_equal(np.isnan(Xt_inv), np.isnan(X_test))
# FIXME: we can introduce equal_nan=True in recent version of numpy.
# For the moment which just check that non-NaN values are almost equal.
assert_allclose(Xt_inv[~np.isnan(Xt_inv)], X_test[~np.isnan(X_test)])
for i in range(X.shape[1]):
# train only on non-NaN
est.fit(_get_valid_samples_by_column(X_train, i))
# check transforming with NaN works even when training without NaN
Xt_col = est.transform(X_test[:, [i]])
assert_array_equal(Xt_col, Xt[:, [i]])
# check non-NaN is handled as before - the 1st column is all nan
if not np.isnan(X_test[:, i]).all():
Xt_col_nonan = est.transform(
_get_valid_samples_by_column(X_test, i))
assert_array_equal(Xt_col_nonan,
Xt_col[~np.isnan(Xt_col.squeeze())])
if support_sparse:
est_dense = clone(est)
est_sparse = clone(est)
Xt_dense = est_dense.fit(X_train).transform(X_test)
Xt_inv_dense = est_dense.inverse_transform(Xt_dense)
for sparse_constructor in (sparse.csr_matrix, sparse.csc_matrix,
sparse.bsr_matrix, sparse.coo_matrix,
sparse.dia_matrix, sparse.dok_matrix,
sparse.lil_matrix):
# check that the dense and sparse inputs lead to the same results
Xt_sparse = (est_sparse.fit(sparse_constructor(X_train))
.transform(sparse_constructor(X_test)))
assert_allclose(Xt_sparse.A, Xt_dense)
Xt_inv_sparse = est_sparse.inverse_transform(Xt_sparse)
assert_allclose(Xt_inv_sparse.A, Xt_inv_dense)
示例15: RunExp
def RunExp(StrModel:str, Param:str, FeaUsed:list, DataPath:str, Label:str, std:bool = False, N:int = 0):
Data = np.genfromtxt(DataPath + Label, delimiter = ',', dtype = int)
Data = Data[:, np.newaxis]
for f in FeaUsed:
T = (np.genfromtxt(DataPath + Features[f], delimiter = ',' , dtype = float))
if len(T.shape) < 2:
T = T[:, np.newaxis]
Data = np.concatenate((Data, T), axis = 1)
if N > 0:
Data = Data[:N, :]
Lbl = Data[:, 0]
Fea = Data[:,1:]
if std:
scaler = preprocessing.StandardScaler()
Fea = scaler.fit_transform(Fea)
Model = base.clone(Models[StrModel])
SetParam(Model, Param)
Model.fit(Fea, Lbl)
Pred = Model.predict_proba(Fea)[:, 1]
st = metrics.precision_recall_curve(Lbl, Pred)
Folds = cross_validation.KFold(Fea.shape[0], n_folds = 5)
for train, valid in Folds:
Model = base.clone(Models[StrModel])
SetParam(Model, Param)
Model.fit(Fea[train], Lbl[train])
Pred[valid] = Model.predict_proba(Fea[valid])[:, 1]
sv = metrics.precision_recall_curve(Lbl, Pred)
return st, sv