Python linear_model.OLS类代码示例

本文整理汇总了Python中statsmodels.regression.linear_model.OLS类的典型用法代码示例。如果您正苦于以下问题：Python OLS类的具体用法？Python OLS怎么用？Python OLS使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了OLS类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_regularized_weights

    def test_regularized_weights(self):

        np.random.seed(1432)
        exog1 = np.random.normal(size=(100, 3))
        endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
        exog2 = np.random.normal(size=(100, 3))
        endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)

        exog_a = np.vstack((exog1, exog1, exog2))
        endog_a = np.concatenate((endog1, endog1, endog2))

        # Should be equivalent to exog_a, endog_a.
        exog_b = np.vstack((exog1, exog2))
        endog_b = np.concatenate((endog1, endog2))
        wgts = np.ones(200)
        wgts[0:100] = 2
        sigma = np.diag(1/wgts)

        for L1_wt in 0, 0.5, 1:
            for alpha in 0, 1:
                mod1 = OLS(endog_a, exog_a)
                rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod2 = WLS(endog_b, exog_b, weights=wgts)
                rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod3 = GLS(endog_b, exog_b, sigma=sigma)
                rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
                assert_almost_equal(rslt1.params, rslt3.params, decimal=3)

开发者ID:statsmodels，项目名称:statsmodels，代码行数:31，代码来源:test_regression.py

示例2: test_regression_with_tuples

    def test_regression_with_tuples(self):
        i = pandas.Series([1, 2, 3, 4] * 10, name="i")
        y = pandas.Series([1, 2, 3, 4, 5] * 8, name="y")
        x = pandas.Series([1, 2, 3, 4, 5, 6, 7, 8] * 5, name="x")

        df = pandas.DataFrame(index=i.index)
        df = df.join(i)
        endo = df.join(y)
        exo = df.join(x)
        endo_groups = endo.groupby("i")
        exo_groups = exo.groupby("i")
        exo_df = exo_groups.agg([np.sum, np.max])
        endo_df = endo_groups.agg([np.sum, np.max])
        reg = OLS(exo_df[[("x", "sum")]], endo_df).fit()
        interesting_lines = []
        import warnings
        with warnings.catch_warnings():
            # Catch ominormal warning, not interesting here
            warnings.simplefilter("ignore")
            for line in str(reg.summary()).splitlines():
                if "_" in line:
                    interesting_lines.append(line[:38])

        desired = ["Dep. Variable:                  x_sum ",
                   "y_sum          1.4595      0.209      ",
                   "y_amax         0.2432      0.035      "]

        assert_equal(sorted(desired), sorted(interesting_lines))

开发者ID:statsmodels，项目名称:statsmodels，代码行数:28，代码来源:test_table.py

示例3: reset_ramsey

def reset_ramsey(res, degree=5):
    '''Ramsey's RESET specification test for linear models

    This is a general specification test, for additional non-linear effects
    in a model.


    Notes
    -----
    The test fits an auxiliary OLS regression where the design matrix, exog,
    is augmented by powers 2 to degree of the fitted values. Then it performs
    an F-test whether these additional terms are significant.

    If the p-value of the f-test is below a threshold, e.g. 0.1, then this
    indicates that there might be additional non-linear effects in the model
    and that the linear model is mis-specified.


    References
    ----------
    http://en.wikipedia.org/wiki/Ramsey_RESET_test

    '''
    order = degree + 1
    k_vars = res.model.exog.shape[1]
    #vander without constant and x:
    y_fitted_vander = np.vander(res.fittedvalues, order)[:, :-2] #drop constant
    exog = np.column_stack((res.model.exog, y_fitted_vander))
    res_aux = OLS(res.model.endog, exog).fit()
    #r_matrix = np.eye(degree, exog.shape[1], k_vars)
    r_matrix = np.eye(degree-1, exog.shape[1], k_vars)
    #df1 = degree - 1
    #df2 = exog.shape[0] - degree - res.df_model  (without constant)
    return res_aux.f_test(r_matrix) #, r_matrix, res_aux

开发者ID:EnricoGiampieri，项目名称:statsmodels，代码行数:34，代码来源:outliers_influence.py

示例4: setupClass

 def setupClass(cls):
     R = np.zeros(7)
     R[4:6] = [1,-1]
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     cls.Ttest1 = res1.t_test(R)

开发者ID:NanoResearch，项目名称:statsmodels，代码行数:7，代码来源:test_regression.py

示例5: fit_dlogM_mw

def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'):
    merge_tab = t.join(tab, sfrsd_tab, 'plateifu')
    is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4])

    mlb_ix = totalmass.StellarMass.bands_ixs[mlb]
    absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix]

    logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun))
    logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab['ifu_absmag'][:, mlb_ix].to(
        u.dex(m.bandpass_sol_l_unit), totalmass.bandpass_flux_to_solarunits(absmag_sun_mlb))
    merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw
    ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3)
    sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2
    mass_pca = merge_tab['mass_in_ifu'] + merge_tab['outer_mass_{}'.format(mltype)]
    ssfrsd = sfrsd / mass_pca
    merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit))
    merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit

    ols = OLS(
        endog=np.array(merge_tab['dlogmass_lw'][~is_agn]),
        exog=sm_add_constant(
            t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(),
            prepend=False),
        hasconst=True, missing='drop')

    olsfit = ols.fit()

    return olsfit

开发者ID:zpace，项目名称:stellarmass_pca，代码行数:28，代码来源:mass_agg.py

示例6: test_regularized

    def test_regularized(self):

        import os
        from . import glmnet_r_results

        cur_dir = os.path.dirname(os.path.abspath(__file__))
        data = np.loadtxt(os.path.join(cur_dir, "results", "lasso_data.csv"),
                          delimiter=",")

        tests = [x for x in dir(glmnet_r_results) if x.startswith("rslt_")]

        for test in tests:

            vec = getattr(glmnet_r_results, test)

            n = vec[0]
            p = vec[1]
            L1_wt = float(vec[2])
            lam = float(vec[3])
            params = vec[4:].astype(np.float64)

            endog = data[0:int(n), 0]
            exog = data[0:int(n), 1:(int(p)+1)]

            endog = endog - endog.mean()
            endog /= endog.std(ddof=1)
            exog = exog - exog.mean(0)
            exog /= exog.std(0, ddof=1)

            mod = OLS(endog, exog)
            rslt = mod.fit_regularized(L1_wt=L1_wt, alpha=lam)
            assert_almost_equal(rslt.params, params, decimal=3)

            # Smoke test for summary
            smry = rslt.summary()

开发者ID:NanoResearch，项目名称:statsmodels，代码行数:35，代码来源:test_regression.py

示例7: test_f_score_with_covars_and_normalized_design_withcovar

def test_f_score_with_covars_and_normalized_design_withcovar(random_state=0):
    """

    This test has a statsmodels dependance. There seems to be no simple,
    alternative way to perform a F-test on a linear model including
    covariates.

    """
    try:
        from statsmodels.regression.linear_model import OLS
    except:
        warnings.warn("Statsmodels is required to run this test")
        raise nose.SkipTest

    rng = check_random_state(random_state)

    ### Normalized data
    n_samples = 50
    # generate data
    var1 = np.ones((n_samples, 1)) / np.sqrt(n_samples)  # normalized
    var2 = rng.randn(n_samples, 1)
    var2 = var2 / np.sqrt(np.sum(var2 ** 2, 0))  # normalize
    covars = np.eye(n_samples, 3)  # covars is orthogonal
    covars[3] = -1  # covars is orthogonal to var1
    covars = orthonormalize_matrix(covars)
    # own f_score
    f_val_own = _f_score_with_covars_and_normalized_design(var1, var2, covars)[0]
    # statsmodels f_score
    test_matrix = np.array([[1.0, 0.0, 0.0, 0.0]])
    statsmodels_ols = OLS(var2, np.hstack((var1, covars))).fit()
    f_val_statsmodels = statsmodels_ols.f_test(test_matrix).fvalue[0]
    assert_array_almost_equal(f_val_own, f_val_statsmodels)

开发者ID:philouc，项目名称:nilearn，代码行数:32，代码来源:test_permuted_least_squares.py

示例8: test_permuted_ols_intercept_statsmodels_withcovar

def test_permuted_ols_intercept_statsmodels_withcovar(random_state=0):
    """

    This test has a statsmodels dependance. There seems to be no simple,
    alternative way to perform a F-test on a linear model including
    covariates.

    """
    try:
        from statsmodels.regression.linear_model import OLS
    except:
        warnings.warn("Statsmodels is required to run this test")
        raise nose.SkipTest

    rng = check_random_state(random_state)
    # design parameters
    n_samples = 50
    # create design
    target_var = rng.randn(n_samples, 1)
    tested_var = np.ones((n_samples, 1))
    confounding_vars = rng.randn(n_samples, 2)
    # statsmodels OLS
    ols = OLS(target_var, np.hstack((tested_var, confounding_vars))).fit()
    fvals = ols.f_test([[1.0, 0.0, 0.0]]).fvalue
    # permuted OLS
    _, orig_scores, _ = permuted_ols(tested_var, target_var, confounding_vars, n_perm=0, random_state=random_state)
    # same thing but with model_intercept=True to check it has no effect
    _, orig_scores_addintercept, _ = permuted_ols(
        tested_var, target_var, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state
    )
    assert_array_almost_equal(fvals, orig_scores, decimal=6)
    assert_array_almost_equal(orig_scores, orig_scores_addintercept, decimal=6)

开发者ID:philouc，项目名称:nilearn，代码行数:32，代码来源:test_permuted_least_squares.py

示例9: test_repeat_partition

def test_repeat_partition():

    # tests that if we use identical partitions the average is the same
    # as the estimate for the full data

    np.random.seed(435265)
    N = 200
    p = 10
    m = 1

    beta = np.random.normal(size=p)
    beta = beta * np.random.randint(0, 2, p)
    X = np.random.normal(size=(N, p))
    y = X.dot(beta) + np.random.normal(size=N)

    def _rep_data_gen(endog, exog, partitions):
        """partitions data"""

        n_exog = exog.shape[0]
        n_part = np.ceil(n_exog / partitions)

        ii = 0
        while ii < n_exog:
            yield endog, exog
            ii += int(n_part)

    nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive,
                              join_method=_join_naive)
    fitOLSnv = nv_mod.fit(_rep_data_gen(y, X, m), fit_kwds={"alpha": 0.1})

    ols_mod = OLS(y, X)
    fitOLS = ols_mod.fit_regularized(alpha=0.1)

    assert_allclose(fitOLSnv.params, fitOLS.params)

开发者ID:BranYang，项目名称:statsmodels，代码行数:34，代码来源:test_distributed_estimation.py

示例10: setup_class

    def setup_class(cls):
        cls.cov_type = 'HC0'

        mod1 = GLM(endog, exog, family=families.Gaussian())
        cls.res1 = mod1.fit(cov_type='HC0')

        mod2 = OLS(endog, exog)
        cls.res2 = mod2.fit(cov_type='HC0')

开发者ID:tadeze，项目名称:statsmodels，代码行数:8，代码来源:test_sandwich_cov.py

示例11: test_filter

def test_filter():
    # Basic test for filtering
    mod = RecursiveLS(endog, exog)
    res = mod.filter()

    # Test the RLS estimates against OLS estimates
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    assert_allclose(res.params, res_ols.params)

开发者ID:bashtage，项目名称:statsmodels，代码行数:9，代码来源:test_recursive_ls.py

示例12: test_conf_int_single_regressor

def test_conf_int_single_regressor():
    # GH#706 single-regressor model (i.e. no intercept) with 1D exog
    # should get passed to DataFrame for conf_int
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y, x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))

开发者ID:statsmodels，项目名称:statsmodels，代码行数:9，代码来源:test_regression.py

示例13: qqline

def qqline(ax, line, x=None, y=None, dist=None, fmt='r-'):
    """
    Plot a reference line for a qqplot.

    Parameters
    ----------
    ax : matplotlib axes instance
        The axes on which to plot the line
    line : str {'45','r','s','q'}
        Options for the reference line to which the data is compared.:

        - '45' - 45-degree line
        - 's'  - standardized line, the expected order statistics are scaled by
                 the standard deviation of the given sample and have the mean
                 added to them
        - 'r'  - A regression line is fit
        - 'q'  - A line is fit through the quartiles.
        - None - By default no reference line is added to the plot.

    x : array
        X data for plot. Not needed if line is '45'.
    y : array
        Y data for plot. Not needed if line is '45'.
    dist : scipy.stats.distribution
        A scipy.stats distribution, needed if line is 'q'.

    Notes
    -----
    There is no return value. The line is plotted on the given `ax`.
    """
    if line == '45':
        end_pts = zip(ax.get_xlim(), ax.get_ylim())
        end_pts[0] = min(end_pts[0])
        end_pts[1] = max(end_pts[1])
        ax.plot(end_pts, end_pts, fmt)
        ax.set_xlim(end_pts)
        ax.set_ylim(end_pts)
        return # does this have any side effects?
    if x is None and y is None:
        raise ValueError("If line is not 45, x and y cannot be None.")
    elif line == 'r':
        # could use ax.lines[0].get_xdata(), get_ydata(),
        # but don't know axes are 'clean'
        y = OLS(y, add_constant(x)).fit().fittedvalues
        ax.plot(x,y,fmt)
    elif line == 's':
        m,b = y.std(), y.mean()
        ref_line = x*m + b
        ax.plot(x, ref_line, fmt)
    elif line == 'q':
        _check_for_ppf(dist)
        q25 = stats.scoreatpercentile(y, 25)
        q75 = stats.scoreatpercentile(y, 75)
        theoretical_quartiles = dist.ppf([0.25, 0.75])
        m = (q75 - q25) / np.diff(theoretical_quartiles)
        b = q25 - m*theoretical_quartiles[0]
        ax.plot(x, m*x + b, fmt)

开发者ID:SuperXrooT，项目名称:statsmodels，代码行数:57，代码来源:gofplots.py

示例14: test_706

def test_706():
    # make sure one regressor pandas Series gets passed to DataFrame
    # for conf_int.
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y,x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))

开发者ID:NanoResearch，项目名称:statsmodels，代码行数:9，代码来源:test_regression.py

示例15: test_summary_as_latex

def test_summary_as_latex():
    # GH#734
    import re
    dta = longley.load_pandas()
    X = dta.exog
    X["constant"] = 1
    y = dta.endog
    res = OLS(y, X).fit()
    with pytest.warns(UserWarning):
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub("(?<=\n\\\\textbf\\{Date:\\}             &).+?&",
                   " Sun, 07 Apr 2013 &", table)
    table = re.sub("(?<=\n\\\\textbf\\{Time:\\}             &).+?&",
                   "     13:46:07     &", table)

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lcccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &     -177.029    &      207.153     \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &       -0.112    &        0.040     \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &       -3.125    &       -0.915     \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &       -1.518    &       -0.549     \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &       -0.563    &        0.460     \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &      798.788    &     2859.515     \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &     -5.5e+06    &    -1.47e+06     \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}

Warnings: \\newline
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline
 [2] The condition number is large, 4.86e+09. This might indicate that there are \\newline
 strong multicollinearity or other numerical problems."""
    assert_equal(table, expected)

开发者ID:statsmodels，项目名称:statsmodels，代码行数:56，代码来源:test_regression.py

注：本文中的statsmodels.regression.linear_model.OLS类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。