本文整理汇总了Python中sklearn.datasets.load_breast_cancer函数的典型用法代码示例。如果您正苦于以下问题:Python load_breast_cancer函数的具体用法?Python load_breast_cancer怎么用?Python load_breast_cancer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_breast_cancer函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_load_breast_cancer
def test_load_breast_cancer():
res = load_breast_cancer()
assert_equal(res.data.shape, (569, 30))
assert_equal(res.target.size, 569)
assert_equal(res.target_names.size, 2)
assert_true(res.DESCR)
# test return_X_y option
X_y_tuple = load_breast_cancer(return_X_y=True)
bunch = load_breast_cancer()
assert_true(isinstance(X_y_tuple, tuple))
assert_array_equal(X_y_tuple[0], bunch.data)
assert_array_equal(X_y_tuple[1], bunch.target)
示例2: Breast_cancer
def Breast_cancer(training_size, test_size, n, PLOT_DATA):
class_labels = [r'A', r'B']
data, target = datasets.load_breast_cancer(True)
sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)
# Now we standarize for gaussian around 0 with unit variance
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)
# Now reduce number of features to number of qubits
pca = PCA(n_components=n).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)
# Scale to the range (-1,+1)
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)
# Pick training size number of samples from each distro
training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
test_input = {key: (sample_train[label_train == k, :])[training_size:(
training_size+test_size)] for k, key in enumerate(class_labels)}
if PLOT_DATA:
for k in range(0, 2):
plt.scatter(sample_train[label_train == k, 0][:training_size],
sample_train[label_train == k, 1][:training_size])
plt.title("PCA dim. reduced Breast cancer dataset")
plt.show()
return sample_train, training_input, test_input, class_labels
示例3: test_dt
def test_dt():
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
feature_names = cancer.feature_names
sk_dt = SKDT(random_state=1, max_depth=3)
our_dt = ClassificationTree(feature_names=feature_names, random_state=1)
sk_dt.fit(X, y)
our_dt.fit(X, y)
sk_pred = sk_dt.predict_proba(X)
our_pred = our_dt.predict_proba(X)
assert np.allclose(sk_pred, our_pred)
sk_pred = sk_dt.predict(X)
our_pred = our_dt.predict(X)
assert np.allclose(sk_pred, our_pred)
# With labels
local_expl = our_dt.explain_local(X, y)
local_viz = local_expl.visualize(0)
assert local_viz is not None
# Without labels
local_expl = our_dt.explain_local(X)
local_viz = local_expl.visualize(0)
assert local_viz is not None
global_expl = our_dt.explain_global()
global_viz = global_expl.visualize()
assert global_viz is not None
示例4: test_RFECV
def test_RFECV():
from sklearn.datasets import load_boston
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.feature_selection import RFECV
# Regression
X, y = load_boston(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='reg:squarederror',
random_state=0, verbosity=0)
rfecv = RFECV(
estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
rfecv.fit(X, y)
# Binary classification
X, y = load_breast_cancer(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='binary:logistic',
random_state=0, verbosity=0)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
rfecv.fit(X, y)
# Multi-class classification
X, y = load_iris(return_X_y=True)
bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear',
learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='multi:softprob',
random_state=0, reg_alpha=0.001, reg_lambda=0.01,
scale_pos_weight=0.5, verbosity=0)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
rfecv.fit(X, y)
示例5: main
def main():
dataset = datasets.load_breast_cancer()
features = dataset.data
labels = dataset.target
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3,
stratify=labels)
parameter_set = {'loss': ('hinge', 'squared_hinge'), 'C': [1, 10, 100, 1000, 5, 50, 500, 5000]}
model = LinearSVC()
grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
train_data=[train_features, train_labels], test_data=[test_features, test_labels])
print(grid_scores)
print('SVM best score: {}'.format(best_score))
print('SVM best params : {}'.format(best_params))
print('SVM test score : {}'.format(test_score))
parameter_set = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
'solver': ['sgd', 'adam'],
'batch_size': [16, 32, 64, 128],}
model = MLPClassifier()
grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
train_data=[train_features, train_labels], test_data=[test_features, test_labels])
print(grid_scores)
print('MLP best score: {}'.format(best_score))
print('MLP best params : {}'.format(best_params))
print('MLP test score : {}'.format(test_score))
示例6: test_early_stopping
def test_early_stopping(self):
X, y = load_breast_cancer(True)
params = {
'objective': 'binary',
'metric': 'binary_logloss',
'verbose': -1
}
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
valid_set_name = 'valid_set'
# no early stopping
gbm = lgb.train(params, lgb_train,
num_boost_round=10,
valid_sets=lgb_eval,
valid_names=valid_set_name,
verbose_eval=False,
early_stopping_rounds=5)
self.assertEqual(gbm.best_iteration, 10)
self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
# early stopping occurs
gbm = lgb.train(params, lgb_train,
valid_sets=lgb_eval,
valid_names=valid_set_name,
verbose_eval=False,
early_stopping_rounds=5)
self.assertLessEqual(gbm.best_iteration, 100)
self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
示例7: load_breast_cancer_df
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
"""Loads the breast cancer dataset into a dataframe with the
target set as the "target" feature or whatever name
is specified in ``tgt_name``.
Parameters
----------
include_tgt : bool, optional (default=True)
Whether to include the target
tgt_name : str, optional (default="target")
The name of the target feature
shuffle : bool, optional (default=False)
Whether to shuffle the rows
Returns
-------
X : pd.DataFrame, shape=(n_samples, n_features)
The loaded dataset
"""
bc = load_breast_cancer()
X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)
if include_tgt:
X[tgt_name] = bc.target
return X if not shuffle else shuffle_dataframe(X)
示例8: setUp
def setUp(self):
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
self.train_data = lgb.Dataset(self.X_train, self.y_train)
self.params = {
"objective": "binary",
"verbose": -1,
"num_leaves": 3
}
示例9: test_binary
def test_binary(self):
X, y = load_breast_cancer(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = log_loss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.15)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
示例10: main
def main(arguments):
# load the features of the dataset
features = datasets.load_breast_cancer().data
# standardize the features
features = StandardScaler().fit_transform(features)
# get the number of features
num_features = features.shape[1]
# load the corresponding labels for the features
labels = datasets.load_breast_cancer().target
# transform the labels to {-1, +1}
labels[labels == 0] = -1
# split the dataset to 70/30 partition: 70% train, 30% test
train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
test_size=0.3, stratify=labels)
train_size = train_features.shape[0]
test_size = test_features.shape[0]
# slice the dataset as per the batch size
train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]
# instantiate the SVM class
model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES,
num_features=num_features)
# train the instantiated model
model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
train_size=train_features.shape[0], validation_data=[test_features, test_labels],
validation_size=test_features.shape[0], result_path=arguments.result_path)
test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path,
class_names=['benign', 'malignant'])
print('True negatives : {}'.format(test_conf[0][0]))
print('False negatives : {}'.format(test_conf[1][0]))
print('True positives : {}'.format(test_conf[1][1]))
print('False positives : {}'.format(test_conf[0][1]))
print('Testing accuracy : {}'.format(test_accuracy))
示例11: load_binary_data
def load_binary_data(self, shuffled=True):
samples = load_breast_cancer()
if shuffled:
self.X = shuffle(samples.data, random_state=self.SEED)
self.y = shuffle(samples.target, random_state=self.SEED)
else:
self.X, self.y = samples.data, samples.target
self.n_features = len(self.X[0])
示例12: test_binary
def test_binary(self):
X_y = load_breast_cancer(True)
params = {
'objective': 'binary',
'metric': 'binary_logloss'
}
evals_result, ret = template.test_template(params, X_y, log_loss)
self.assertLess(ret, 0.15)
self.assertAlmostEqual(min(evals_result['eval']['binary_logloss']), ret, places=5)
示例13: test_issues_161_and_189
def test_issues_161_and_189(self):
"""
ensure DataManager(data).data == data
"""
X, y = load_breast_cancer(True)
X, y = X[15:40], y[15:40]
model = KNeighborsClassifier(weights='distance', p=2, n_neighbors=10).fit(X, y)
skater_model = InMemoryModel(model.predict_proba, examples=X, probability=True)
assert skater_model.probability is True
assert skater_model.model_type == StaticTypes.model_types.classifier
示例14: train_breast_cancer
def train_breast_cancer(param_in):
data = datasets.load_breast_cancer()
X = scale(data.data)
dtrain = xgb.DMatrix(X, label=data.target)
param = {'objective': 'binary:logistic'}
param.update(param_in)
bst = xgb.train(param, dtrain, num_rounds)
xgb_pred = bst.predict(dtrain)
xgb_score = metrics.accuracy_score(data.target, np.round(xgb_pred))
assert xgb_score >= 0.8
示例15: test_load_breast_cancer
def test_load_breast_cancer():
res = load_breast_cancer()
assert_equal(res.data.shape, (569, 30))
assert_equal(res.target.size, 569)
assert_equal(res.target_names.size, 2)
assert_true(res.DESCR)
assert_true(os.path.exists(res.filename))
# test return_X_y option
check_return_X_y(res, partial(load_breast_cancer))