Python datasets.load_boston函数代码示例

本文整理汇总了Python中sklearn.datasets.load_boston函数的典型用法代码示例。如果您正苦于以下问题：Python load_boston函数的具体用法？Python load_boston怎么用？Python load_boston使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了load_boston函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_load_boston

def test_load_boston():
    res = load_boston()
    assert_equal(res.data.shape, (506, 13))
    assert_equal(res.target.size, 506)
    assert_equal(res.feature_names.size, 13)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_boston(return_X_y=True)
    bunch = load_boston()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)

开发者ID:NazBen，项目名称:scikit-learn，代码行数:13，代码来源:test_base.py

示例2: get_cmap_scatter_plot

def get_cmap_scatter_plot():
    boston = datasets.load_boston()
    prices = boston['target']
    lower_status = boston['data'][:,-1]
    nox = boston['data'][:,4]

    x, y = get_data_sources(x=lower_status, y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    color_source = ArrayDataSource(nox)
    color_mapper = dc.reverse(dc.RdYlGn)(
        DataRange1D(low=nox.min(), high=nox.max())
    )

    scatter_plot = ColormappedScatterPlot(
        index=x, value=y,
        index_mapper=x_mapper, value_mapper=y_mapper,
        color_data=color_source,
        color_mapper=color_mapper,
        marker='circle',
        title='Color represents nitric oxides concentration',
        render_method='bruteforce',
        **PLOT_DEFAULTS
    )

    add_axes(scatter_plot, x_label='Percent lower status in the population',
             y_label='Median house prices')

    return scatter_plot

开发者ID:5n1p，项目名称:chaco，代码行数:29，代码来源:create_plot_snapshots.py

示例3: test_regressors_int

def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter='regressor')
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    X = StandardScaler().fit_transform(X)
    y = np.random.randint(2, size=X.shape[0])
    for name, Reg in regressors:
        if Reg in dont_test or Reg in (CCA,):
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            # separate estimators to control random seeds
            reg1 = Reg()
            reg2 = Reg()
        set_random_state(reg1)
        set_random_state(reg2)

        if Reg in (_PLS, PLSCanonical, PLSRegression):
            y_ = np.vstack([y, 2 * y + np.random.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        # fit
        reg1.fit(X, y_)
        pred1 = reg1.predict(X)
        reg2.fit(X, y_.astype(np.float))
        pred2 = reg2.predict(X)
        assert_array_almost_equal(pred1, pred2, 2, name)

开发者ID:nwf5d，项目名称:scikit-learn，代码行数:32，代码来源:test_common.py

示例4: load_data

def load_data():
    """Load the Boston dataset."""

    boston = datasets.load_boston()
        
    
    return boston

开发者ID:shoc2005，项目名称:P1，代码行数:7，代码来源:boston_housing.py

示例5: test_regressors_train

def test_regressors_train():
    estimators = all_estimators()
    regressors = [(name, E) for name, E in estimators if issubclass(E,
        RegressorMixin)]
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    # TODO: test with intercept
    # TODO: test with multiple responses
    X = Scaler().fit_transform(X)
    y = Scaler().fit_transform(y)
    for name, Reg in regressors:
        if Reg in dont_test or Reg in meta_estimators:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            reg = Reg()
        if hasattr(reg, 'alpha'):
            reg.set_params(alpha=0.01)

        # raises error on malformed input for fit
        assert_raises(ValueError, reg.fit, X, y[:-1])
        # fit
        reg.fit(X, y)
        reg.predict(X)
        assert_greater(reg.score(X, y), 0.5)

开发者ID:arunchaganty，项目名称:scikit-learn，代码行数:26，代码来源:test_common.py

示例6: demo

def demo(X = None, y = None, test_size = 0.1):
    
    if X == None:
        boston = load_boston()
        X = pd.DataFrame(boston.data)
        y = pd.DataFrame(boston.target)



    base_estimator = DecisionTreeRegressor(max_depth = 5)


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print X_train.shape

    # If you want to compare with BaggingRegressor.
    # bench =  BaggingRegressor(base_estimator = base_estimator, n_estimators = 10, max_samples = 1, oob_score = True).fit(X_train, y_train)
    # print bench.score(X_test, y_test)
    # print mean_squared_error(bench.predict(X_test), y_test)

    clf = BasicSegmenterEG_FEMPO(ngen=30,init_sample_percentage = 1, n_votes=10, n = 10, base_estimator = base_estimator,
        unseen_x = X_test, unseen_y = y_test)
    clf.fit(X_train, y_train)
    print clf.score(X_test,y_test)
    y = clf.predict(X_test)
    print mean_squared_error(y, y_test)
    print y.shape

    return clf, X_test, y_test

开发者ID:bhanu-mnit，项目名称:EvoML，代码行数:29，代码来源:test_auto_segmentEG_FEMPO.py

示例7: test_rrf_vs_sklearn_reg

    def test_rrf_vs_sklearn_reg(self):
        """Test R vs. sklearn on boston housing dataset. """
        from sklearn.datasets import load_boston
        from sklearn.cross_validation import train_test_split
        from sklearn.metrics import mean_squared_error
        from sklearn.ensemble import RandomForestRegressor

        boston = load_boston()
        X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
                                                            test_size=0.2, random_state=13)

        n_samples, n_features = X_train.shape
        mtry = int(np.floor(0.3 * n_features))
        # do 100 trees
        r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
                                'mtry': mtry, 'corr.bias': False,
                                'sampsize': n_samples, 'random_state': 1234})
        r_rf.fit(X_train, y_train)
        y_pred = r_rf.predict(X_test)
        r_mse = mean_squared_error(y_test, y_pred)

        p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
                                     max_features=mtry, random_state=1)
        p_rf.fit(X_train, y_train)
        y_pred = p_rf.predict(X_test)
        p_mse = mean_squared_error(y_test, y_pred)
        print('%.4f vs %.4f' % (r_mse, p_mse))
        # should be roughly the same (7.6 vs. 7.2)
        np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)

开发者ID:tkincaid，项目名称:tkincaid.github.com，代码行数:29，代码来源:test_rrf_bm.py

示例8: test_template

 def test_template(params={'objective': 'regression', 'metric': 'l2'},
                   X_y=load_boston(True), feval=mean_squared_error,
                   num_round=100, init_model=None, custom_eval=None,
                   early_stopping_rounds=10,
                   return_data=False, return_model=False):
     params['verbose'], params['seed'] = -1, 42
     X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train, params=params)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
     if return_data:
         return lgb_train, lgb_eval
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=num_round,
                     valid_sets=lgb_eval,
                     valid_names='eval',
                     verbose_eval=False,
                     feval=custom_eval,
                     evals_result=evals_result,
                     early_stopping_rounds=early_stopping_rounds,
                     init_model=init_model)
     if return_model:
         return gbm
     else:
         return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))

开发者ID:kqdmqx，项目名称:LightGBM，代码行数:25，代码来源:test_engine.py

示例9: load_boston

    def load_boston():
        from sklearn.datasets import load_boston
        boston = load_boston()
        # print(boston.DESCR)

        # print(boston.feature_names)
        # CRIM      : 人口1人当たりの犯罪発生数
        # ZN        : 25,000 平方フィート以上の住居区画の占める割合
        # INDUS     : 小売業以外の商業が占める面積の割合
        # CHAS      : チャールズ川によるダミー変数 (1: 川の周辺, 0: それ以外)
        # NOX       : NOx の濃度
        # RM        : 住居の平均部屋数
        # AGE       : 1940 年より前に建てられた物件の割合
        # DIS       : 5 つのボストン市の雇用施設からの距離 (重み付け済)
        # RAD       : 環状高速道路へのアクセスしやすさ
        # TAX       : $10,000 ドルあたりの不動産税率の総計
        # PTRATIO   : 町毎の児童と教師の比率
        # B         : 町毎の黒人 (Bk) の比率を次の式で表したもの。 1000(Bk – 0.63)^2
        # LSTAT     : 給与の低い職業に従事する人口の割合 (%)

        # pp.pprint(boston.data)
        # print(np.array(boston.data).shape)
        # pp.pprint(boston.target) # house prices
        X = boston.data
        y = boston.target
        return SklearnDataGenerator.shuffle(X, y)

开发者ID:Munetaka，项目名称:labo，代码行数:26，代码来源:sklearn_data_generator.py

示例10: get_data

def get_data():

    data = load_boston()

    clf = LinearRegression()

    clf.fit(data.data, data.target)

    predicted = clf.predict(data.data)

    plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')
    
    plt.scatter(data.target, predicted)
    
    plt.plot([0, 50], [0, 50], '--k')
    
    plt.axis('tight')
    
    plt.xlabel('True price of Houses ($1000s)')
    
    plt.ylabel('Predicted price of Houses ($1000s)')
        
    img = StringIO.StringIO()
    
    plt.savefig(img,bbox_inches='tight')
    
    img.seek(0)
    
    plt.close()

    return img

开发者ID:abhishekgahlot，项目名称:ml-project，代码行数:31，代码来源:linear_regression.py

示例11: generate_data

    def generate_data(case, sparse=False):
        # Generate regression / classification data. 
        bunch = None 
        if case == 'regression':
            bunch = datasets.load_boston()
        elif case == 'classification': 
            bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
        X, y = shuffle(bunch.data, bunch.target)
        offset = int(X.shape[0] * 0.8) 
        X_train, y_train = X[:offset], y[:offset]
        X_test, y_test = X[offset:], y[offset:] 
        if sparse:
            X_train = csr_matrix(X_train)
            X_test = csr_matrix(X_test)
        else:
            X_train = np.array(X_train)
            X_test = np.array(X_test)
        y_test = np.array(y_test)
        y_train = np.array(y_train)
        data = {
            'X_train': X_train,
            'X_test': X_test,
            'y_train': y_train,
            'y_test': y_test,
        }

        return data

开发者ID:0x0all，项目名称:machineLearning，代码行数:27，代码来源:plot_model_complexity_influence.py

示例12: get_bar_plot

def get_bar_plot():
    boston = datasets.load_boston()
    prices = boston['target']

    ys, bin_edges = np.histogram(prices, bins=10)
    ys = ys.astype('d') / ys.sum()
    xs = (bin_edges[:-1] + bin_edges[1:]) / 2.0

    x, y = get_data_sources(x=xs, y=ys)
    x_mapper, y_mapper = get_mappers(x, y)

    # we need to make the range of the x coordinate a bit larger, otherwise
    # half of the first and last bar are cut
    delta = bin_edges[1] - bin_edges[0]
    x_mapper.range.low = xs[0] - delta / 2.
    x_mapper.range.high = xs[-1] + delta / 2.

    y_mapper.range.high += 0.02

    bar_plot = BarPlot(
        index = x,
        value = y,
        index_mapper = x_mapper,
        value_mapper = y_mapper,
        fill_color = 'blue',
        bar_width = 3.0,
        **PLOT_DEFAULTS
    )

    add_axes(bar_plot, x_label='Median house prices', y_label='Frequency')

    return bar_plot

开发者ID:5n1p，项目名称:chaco，代码行数:32，代码来源:create_plot_snapshots.py

示例13: get_jitter_plot

def get_jitter_plot():
    boston = datasets.load_boston()
    prices = boston['target']

    x, y = get_data_sources(y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    jitter_plot = JitterPlot(
        index=y,
        mapper=y_mapper,
        marker='circle',
        jitter_width=100,
        **PLOT_DEFAULTS
    )
    jitter_plot.line_width = 1.

    x_axis = PlotAxis(orientation='bottom',
                      title='Median house prices',
                      mapper=jitter_plot.mapper,
                      component=jitter_plot,
                      **AXIS_DEFAULTS)

    jitter_plot.underlays.append(x_axis)

    return jitter_plot

开发者ID:5n1p，项目名称:chaco，代码行数:25，代码来源:create_plot_snapshots.py

示例14: get_variable_size_scatter_plot

def get_variable_size_scatter_plot():
    boston = datasets.load_boston()
    prices = boston['target']
    lower_status = boston['data'][:,-1]
    tax = boston['data'][:,9]

    x, y = get_data_sources(x=lower_status, y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    # normalize between 0 and 10
    marker_size = tax / tax.max() * 10.

    scatter_plot = ScatterPlot(
        index=x, value=y,
        index_mapper=x_mapper, value_mapper=y_mapper,
        marker='circle',
        marker_size=marker_size,
        title='Size represents property-tax rate',
        **PLOT_DEFAULTS
    )
    scatter_plot.color = (0.0, 1.0, 0.3, 0.4)

    add_axes(scatter_plot, x_label='Percent lower status in the population',
             y_label='Median house prices')

    return scatter_plot

开发者ID:5n1p，项目名称:chaco，代码行数:26，代码来源:create_plot_snapshots.py

示例15: load_extended_boston

def load_extended_boston():
    boston = load_boston()
    X = boston.data

    X = MinMaxScaler().fit_transform(boston.data)
    X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
    return X, boston.target

开发者ID:MrGreenRubato，项目名称:notebooks，代码行数:7，代码来源:datasets.py

注：本文中的sklearn.datasets.load_boston函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。