本文整理汇总了Python中sklearn.model_selection.learning_curve方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.learning_curve方法的具体用法?Python model_selection.learning_curve怎么用?Python model_selection.learning_curve使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.learning_curve方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_learning_curve_verbose
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_verbose():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(20)
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
train_sizes, train_scores, test_scores = \
learning_curve(estimator, X, y, cv=3, verbose=1)
finally:
out = sys.stdout.getvalue()
sys.stdout.close()
sys.stdout = old_stdout
assert("[learning_curve]" in out)
示例2: test_learning_curve_batch_and_incremental_learning_are_equal
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_batch_and_incremental_learning_are_equal():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
train_sizes = np.linspace(0.2, 1.0, 5)
estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
shuffle=False)
train_sizes_inc, train_scores_inc, test_scores_inc = \
learning_curve(
estimator, X, y, train_sizes=train_sizes,
cv=3, exploit_incremental_learning=True)
train_sizes_batch, train_scores_batch, test_scores_batch = \
learning_curve(
estimator, X, y, cv=3, train_sizes=train_sizes,
exploit_incremental_learning=False)
assert_array_equal(train_sizes_inc, train_sizes_batch)
assert_array_almost_equal(train_scores_inc.mean(axis=1),
train_scores_batch.mean(axis=1))
assert_array_almost_equal(test_scores_inc.mean(axis=1),
test_scores_batch.mean(axis=1))
示例3: test_learning_curve_with_boolean_indices
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_with_boolean_indices():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(20)
cv = KFold(n_splits=3)
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
assert_array_equal(train_sizes, np.linspace(2, 20, 10))
assert_array_almost_equal(train_scores.mean(axis=1),
np.linspace(1.9, 1.0, 10))
assert_array_almost_equal(test_scores.mean(axis=1),
np.linspace(0.1, 1.0, 10))
# 0.23. warning about tol not having its correct default value.
示例4: learning_curve
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def learning_curve(self, graphs, targets,
cv=5, n_steps=10, start_fraction=0.1):
"""learning_curve."""
graphs, targets = paired_shuffle(graphs, targets)
x = self.transform(graphs)
train_sizes = np.linspace(start_fraction, 1.0, n_steps)
scoring = 'roc_auc'
train_sizes, train_scores, test_scores = learning_curve(
self.model, x, targets,
cv=cv, train_sizes=train_sizes,
scoring=scoring)
return train_sizes, train_scores, test_scores
示例5: __calc_learning_curve
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def __calc_learning_curve(self, algorithm):
estimator = algorithm.estimator
train_sizes, train_scores, test_scores = learning_curve(
estimator,
self.data.X,
self.data.y,
cv=self.cv,
scoring=self.scoring,
n_jobs=self.n_jobs) # parallel run in cross validation
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
return {'x': train_sizes, 'y_train': train_scores_mean,
'y_cv': test_scores_mean}
示例6: test_learning_curve
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve():
n_samples = 30
n_splits = 3
X, y = make_classification(n_samples=n_samples, n_features=1,
n_informative=1, n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
for shuffle_train in [False, True]:
with warnings.catch_warnings(record=True) as w:
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=KFold(n_splits=n_splits),
train_sizes=np.linspace(0.1, 1.0, 10),
shuffle=shuffle_train)
if len(w) > 0:
raise RuntimeError("Unexpected warning: %r" % w[0].message)
assert_equal(train_scores.shape, (10, 3))
assert_equal(test_scores.shape, (10, 3))
assert_array_equal(train_sizes, np.linspace(2, 20, 10))
assert_array_almost_equal(train_scores.mean(axis=1),
np.linspace(1.9, 1.0, 10))
assert_array_almost_equal(test_scores.mean(axis=1),
np.linspace(0.1, 1.0, 10))
# Test a custom cv splitter that can iterate only once
with warnings.catch_warnings(record=True) as w:
train_sizes2, train_scores2, test_scores2 = learning_curve(
estimator, X, y,
cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
train_sizes=np.linspace(0.1, 1.0, 10),
shuffle=shuffle_train)
if len(w) > 0:
raise RuntimeError("Unexpected warning: %r" % w[0].message)
assert_array_almost_equal(train_scores2, train_scores)
assert_array_almost_equal(test_scores2, test_scores)
示例7: test_learning_curve_unsupervised
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_unsupervised():
X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(20)
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
assert_array_equal(train_sizes, np.linspace(2, 20, 10))
assert_array_almost_equal(train_scores.mean(axis=1),
np.linspace(1.9, 1.0, 10))
assert_array_almost_equal(test_scores.mean(axis=1),
np.linspace(0.1, 1.0, 10))
示例8: test_learning_curve_incremental_learning_not_possible
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_incremental_learning_not_possible():
X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
# The mockup does not have partial_fit()
estimator = MockImprovingEstimator(1)
assert_raises(ValueError, learning_curve, estimator, X, y,
exploit_incremental_learning=True)
示例9: test_learning_curve_incremental_learning
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_incremental_learning():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockIncrementalImprovingEstimator(20)
for shuffle_train in [False, True]:
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=3, exploit_incremental_learning=True,
train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
assert_array_equal(train_sizes, np.linspace(2, 20, 10))
assert_array_almost_equal(train_scores.mean(axis=1),
np.linspace(1.9, 1.0, 10))
assert_array_almost_equal(test_scores.mean(axis=1),
np.linspace(0.1, 1.0, 10))
示例10: test_learning_curve_n_sample_range_out_of_bounds
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_n_sample_range_out_of_bounds():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(20)
assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
train_sizes=[0, 1])
assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
train_sizes=[0.0, 1.0])
assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
train_sizes=[0.1, 1.1])
assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
train_sizes=[0, 20])
assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
train_sizes=[1, 21])
示例11: test_learning_curve_remove_duplicate_sample_sizes
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_remove_duplicate_sample_sizes():
X, y = make_classification(n_samples=3, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(2)
train_sizes, _, _ = assert_warns(
RuntimeWarning, learning_curve, estimator, X, y, cv=3,
train_sizes=np.linspace(0.33, 1.0, 3))
assert_array_equal(train_sizes, [1, 2])
示例12: test_learning_curve_with_shuffle
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def test_learning_curve_with_shuffle():
# Following test case was designed this way to verify the code
# changes made in pull request: #7506.
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
[17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
[15, 16], [17, 18]])
y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
# Splits on these groups fail without shuffle as the first iteration
# of the learning curve doesn't contain label 4 in the training set.
estimator = PassiveAggressiveClassifier(max_iter=5, tol=None,
shuffle=False)
cv = GroupKFold(n_splits=2)
train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
groups=groups, shuffle=True, random_state=2)
assert_array_almost_equal(train_scores_batch.mean(axis=1),
np.array([0.75, 0.3, 0.36111111]))
assert_array_almost_equal(test_scores_batch.mean(axis=1),
np.array([0.36111111, 0.25, 0.25]))
assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1,
train_sizes=np.linspace(0.3, 1.0, 3), groups=groups,
error_score='raise')
train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
groups=groups, shuffle=True, random_state=2,
exploit_incremental_learning=True)
assert_array_almost_equal(train_scores_inc.mean(axis=1),
train_scores_batch.mean(axis=1))
assert_array_almost_equal(test_scores_inc.mean(axis=1),
test_scores_batch.mean(axis=1))
示例13: plot_learning_curve
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def plot_learning_curve(self, estimator, title, X, y, ylim=None, cv=None,
n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)):
# From https://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
print('Drawing curve, depending on your datasets size, this may take several minutes to several hours.')
plt.figure()
plt.title(title)
if ylim is not None:
plt.ylim(*ylim)
plt.xlabel("Training examples")
plt.ylabel("Score")
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
plt.legend(loc="best")
plt.show()
示例14: plot_learning_curve
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
train_sizes=np.linspace(.1, 1.0, 5), n_jobs=1, figure_path=None):
plt.figure()
plt.title(title)
if ylim is not None:
plt.ylim(*ylim)
plt.xlabel("Training examples")
plt.ylabel("Score")
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
plt.legend(loc="best")
plt.savefig(figure_path)
return plt
示例15: _set_description
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import learning_curve [as 别名]
def _set_description(self, dfe):
importances = pd.Series(self.model.feature_importances_, index=dfe.get_features().columns).sort_values(ascending=False)
y = dfe.df[dfe.target]
X = dfe.df.drop(dfe.target, axis=1)
train_sizes, train_scores, test_scores = learning_curve(self.model, X, y, n_jobs=self.n_jobs)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
pic = ImageFile.create()
with pic.plot() as plt_fig:
plt, fig = plt_fig
fig.set_figwidth(12)
plt.subplot(121)
importances.plot(kind="bar")
ax2 = plt.subplot(122)
ax2.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,color="r")
ax2.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
ax2.plot(train_sizes, train_scores_mean, "o-", color="r", label="学習精度" if self.lang == "ja" else "Training score")
ax2.plot(train_sizes, test_scores_mean, 'o-', color="g", label="評価精度" if self.lang == "ja" else "Cross-validation score")
ax2.set_xlabel("学習データ量(行数)" if self.lang == "ja" else "data records")
ax2.set_ylabel("精度" if self.lang == "ja" else "accuracy")
ax2.set_ylim(0, 1)
ax2.legend(loc="best")
params = (self.score, self.model.__class__.__name__)
self.description = {
"ja": Description("モデルの精度は{:.3f}です(利用モデル:{})。各項目の貢献度は図のようになっています。".format(*params), pic),
"en": Description("The model accuracy is {:.3f}(model is {}). The contributions of each features are here.".format(*params), pic)
}