本文整理汇总了Python中sklearn.datasets.load_boston函数的典型用法代码示例。如果您正苦于以下问题:Python load_boston函数的具体用法?Python load_boston怎么用?Python load_boston使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_boston函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_load_boston
def test_load_boston():
res = load_boston()
assert_equal(res.data.shape, (506, 13))
assert_equal(res.target.size, 506)
assert_equal(res.feature_names.size, 13)
assert_true(res.DESCR)
# test return_X_y option
X_y_tuple = load_boston(return_X_y=True)
bunch = load_boston()
assert_true(isinstance(X_y_tuple, tuple))
assert_array_equal(X_y_tuple[0], bunch.data)
assert_array_equal(X_y_tuple[1], bunch.target)
示例2: get_cmap_scatter_plot
def get_cmap_scatter_plot():
boston = datasets.load_boston()
prices = boston['target']
lower_status = boston['data'][:,-1]
nox = boston['data'][:,4]
x, y = get_data_sources(x=lower_status, y=prices)
x_mapper, y_mapper = get_mappers(x, y)
color_source = ArrayDataSource(nox)
color_mapper = dc.reverse(dc.RdYlGn)(
DataRange1D(low=nox.min(), high=nox.max())
)
scatter_plot = ColormappedScatterPlot(
index=x, value=y,
index_mapper=x_mapper, value_mapper=y_mapper,
color_data=color_source,
color_mapper=color_mapper,
marker='circle',
title='Color represents nitric oxides concentration',
render_method='bruteforce',
**PLOT_DEFAULTS
)
add_axes(scatter_plot, x_label='Percent lower status in the population',
y_label='Median house prices')
return scatter_plot
示例3: test_regressors_int
def test_regressors_int():
# test if regressors can cope with integer labels (by converting them to
# float)
regressors = all_estimators(type_filter='regressor')
boston = load_boston()
X, y = boston.data, boston.target
X, y = shuffle(X, y, random_state=0)
X = StandardScaler().fit_transform(X)
y = np.random.randint(2, size=X.shape[0])
for name, Reg in regressors:
if Reg in dont_test or Reg in (CCA,):
continue
# catch deprecation warnings
with warnings.catch_warnings(record=True):
# separate estimators to control random seeds
reg1 = Reg()
reg2 = Reg()
set_random_state(reg1)
set_random_state(reg2)
if Reg in (_PLS, PLSCanonical, PLSRegression):
y_ = np.vstack([y, 2 * y + np.random.randint(2, size=len(y))])
y_ = y_.T
else:
y_ = y
# fit
reg1.fit(X, y_)
pred1 = reg1.predict(X)
reg2.fit(X, y_.astype(np.float))
pred2 = reg2.predict(X)
assert_array_almost_equal(pred1, pred2, 2, name)
示例4: load_data
def load_data():
"""Load the Boston dataset."""
boston = datasets.load_boston()
return boston
示例5: test_regressors_train
def test_regressors_train():
estimators = all_estimators()
regressors = [(name, E) for name, E in estimators if issubclass(E,
RegressorMixin)]
boston = load_boston()
X, y = boston.data, boston.target
X, y = shuffle(X, y, random_state=0)
# TODO: test with intercept
# TODO: test with multiple responses
X = Scaler().fit_transform(X)
y = Scaler().fit_transform(y)
for name, Reg in regressors:
if Reg in dont_test or Reg in meta_estimators:
continue
# catch deprecation warnings
with warnings.catch_warnings(record=True):
reg = Reg()
if hasattr(reg, 'alpha'):
reg.set_params(alpha=0.01)
# raises error on malformed input for fit
assert_raises(ValueError, reg.fit, X, y[:-1])
# fit
reg.fit(X, y)
reg.predict(X)
assert_greater(reg.score(X, y), 0.5)
示例6: demo
def demo(X = None, y = None, test_size = 0.1):
if X == None:
boston = load_boston()
X = pd.DataFrame(boston.data)
y = pd.DataFrame(boston.target)
base_estimator = DecisionTreeRegressor(max_depth = 5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print X_train.shape
# If you want to compare with BaggingRegressor.
# bench = BaggingRegressor(base_estimator = base_estimator, n_estimators = 10, max_samples = 1, oob_score = True).fit(X_train, y_train)
# print bench.score(X_test, y_test)
# print mean_squared_error(bench.predict(X_test), y_test)
clf = BasicSegmenterEG_FEMPO(ngen=30,init_sample_percentage = 1, n_votes=10, n = 10, base_estimator = base_estimator,
unseen_x = X_test, unseen_y = y_test)
clf.fit(X_train, y_train)
print clf.score(X_test,y_test)
y = clf.predict(X_test)
print mean_squared_error(y, y_test)
print y.shape
return clf, X_test, y_test
示例7: test_rrf_vs_sklearn_reg
def test_rrf_vs_sklearn_reg(self):
"""Test R vs. sklearn on boston housing dataset. """
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
test_size=0.2, random_state=13)
n_samples, n_features = X_train.shape
mtry = int(np.floor(0.3 * n_features))
# do 100 trees
r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
'mtry': mtry, 'corr.bias': False,
'sampsize': n_samples, 'random_state': 1234})
r_rf.fit(X_train, y_train)
y_pred = r_rf.predict(X_test)
r_mse = mean_squared_error(y_test, y_pred)
p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
max_features=mtry, random_state=1)
p_rf.fit(X_train, y_train)
y_pred = p_rf.predict(X_test)
p_mse = mean_squared_error(y_test, y_pred)
print('%.4f vs %.4f' % (r_mse, p_mse))
# should be roughly the same (7.6 vs. 7.2)
np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
示例8: test_template
def test_template(params={'objective': 'regression', 'metric': 'l2'},
X_y=load_boston(True), feval=mean_squared_error,
num_round=100, init_model=None, custom_eval=None,
early_stopping_rounds=10,
return_data=False, return_model=False):
params['verbose'], params['seed'] = -1, 42
X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train, params=params)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
if return_data:
return lgb_train, lgb_eval
evals_result = {}
gbm = lgb.train(params, lgb_train,
num_boost_round=num_round,
valid_sets=lgb_eval,
valid_names='eval',
verbose_eval=False,
feval=custom_eval,
evals_result=evals_result,
early_stopping_rounds=early_stopping_rounds,
init_model=init_model)
if return_model:
return gbm
else:
return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
示例9: load_boston
def load_boston():
from sklearn.datasets import load_boston
boston = load_boston()
# print(boston.DESCR)
# print(boston.feature_names)
# CRIM : 人口1人当たりの犯罪発生数
# ZN : 25,000 平方フィート以上の住居区画の占める割合
# INDUS : 小売業以外の商業が占める面積の割合
# CHAS : チャールズ川によるダミー変数 (1: 川の周辺, 0: それ以外)
# NOX : NOx の濃度
# RM : 住居の平均部屋数
# AGE : 1940 年より前に建てられた物件の割合
# DIS : 5 つのボストン市の雇用施設からの距離 (重み付け済)
# RAD : 環状高速道路へのアクセスしやすさ
# TAX : $10,000 ドルあたりの不動産税率の総計
# PTRATIO : 町毎の児童と教師の比率
# B : 町毎の黒人 (Bk) の比率を次の式で表したもの。 1000(Bk – 0.63)^2
# LSTAT : 給与の低い職業に従事する人口の割合 (%)
# pp.pprint(boston.data)
# print(np.array(boston.data).shape)
# pp.pprint(boston.target) # house prices
X = boston.data
y = boston.target
return SklearnDataGenerator.shuffle(X, y)
示例10: get_data
def get_data():
data = load_boston()
clf = LinearRegression()
clf.fit(data.data, data.target)
predicted = clf.predict(data.data)
plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')
plt.scatter(data.target, predicted)
plt.plot([0, 50], [0, 50], '--k')
plt.axis('tight')
plt.xlabel('True price of Houses ($1000s)')
plt.ylabel('Predicted price of Houses ($1000s)')
img = StringIO.StringIO()
plt.savefig(img,bbox_inches='tight')
img.seek(0)
plt.close()
return img
示例11: generate_data
def generate_data(case, sparse=False):
# Generate regression / classification data.
bunch = None
if case == 'regression':
bunch = datasets.load_boston()
elif case == 'classification':
bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
X, y = shuffle(bunch.data, bunch.target)
offset = int(X.shape[0] * 0.8)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
if sparse:
X_train = csr_matrix(X_train)
X_test = csr_matrix(X_test)
else:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
y_train = np.array(y_train)
data = {
'X_train': X_train,
'X_test': X_test,
'y_train': y_train,
'y_test': y_test,
}
return data
示例12: get_bar_plot
def get_bar_plot():
boston = datasets.load_boston()
prices = boston['target']
ys, bin_edges = np.histogram(prices, bins=10)
ys = ys.astype('d') / ys.sum()
xs = (bin_edges[:-1] + bin_edges[1:]) / 2.0
x, y = get_data_sources(x=xs, y=ys)
x_mapper, y_mapper = get_mappers(x, y)
# we need to make the range of the x coordinate a bit larger, otherwise
# half of the first and last bar are cut
delta = bin_edges[1] - bin_edges[0]
x_mapper.range.low = xs[0] - delta / 2.
x_mapper.range.high = xs[-1] + delta / 2.
y_mapper.range.high += 0.02
bar_plot = BarPlot(
index = x,
value = y,
index_mapper = x_mapper,
value_mapper = y_mapper,
fill_color = 'blue',
bar_width = 3.0,
**PLOT_DEFAULTS
)
add_axes(bar_plot, x_label='Median house prices', y_label='Frequency')
return bar_plot
示例13: get_jitter_plot
def get_jitter_plot():
boston = datasets.load_boston()
prices = boston['target']
x, y = get_data_sources(y=prices)
x_mapper, y_mapper = get_mappers(x, y)
jitter_plot = JitterPlot(
index=y,
mapper=y_mapper,
marker='circle',
jitter_width=100,
**PLOT_DEFAULTS
)
jitter_plot.line_width = 1.
x_axis = PlotAxis(orientation='bottom',
title='Median house prices',
mapper=jitter_plot.mapper,
component=jitter_plot,
**AXIS_DEFAULTS)
jitter_plot.underlays.append(x_axis)
return jitter_plot
示例14: get_variable_size_scatter_plot
def get_variable_size_scatter_plot():
boston = datasets.load_boston()
prices = boston['target']
lower_status = boston['data'][:,-1]
tax = boston['data'][:,9]
x, y = get_data_sources(x=lower_status, y=prices)
x_mapper, y_mapper = get_mappers(x, y)
# normalize between 0 and 10
marker_size = tax / tax.max() * 10.
scatter_plot = ScatterPlot(
index=x, value=y,
index_mapper=x_mapper, value_mapper=y_mapper,
marker='circle',
marker_size=marker_size,
title='Size represents property-tax rate',
**PLOT_DEFAULTS
)
scatter_plot.color = (0.0, 1.0, 0.3, 0.4)
add_axes(scatter_plot, x_label='Percent lower status in the population',
y_label='Median house prices')
return scatter_plot
示例15: load_extended_boston
def load_extended_boston():
boston = load_boston()
X = boston.data
X = MinMaxScaler().fit_transform(boston.data)
X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
return X, boston.target