Python anova.anova_lm函数代码示例

本文整理汇总了Python中statsmodels.stats.anova.anova_lm函数的典型用法代码示例。如果您正苦于以下问题：Python anova_lm函数的具体用法？Python anova_lm怎么用？Python anova_lm使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了anova_lm函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: model_formulas

def model_formulas():
    ''' Define models through formulas '''
    
    # Get the data:
    # Development of world record times for the 100m Freestyle, for men and women.
    data = pd.read_csv('swim100m.csv')
    
    # Different models
    model1 = ols("time ~ sex", data).fit()  # one factor
    model2 = ols("time ~ sex + year", data).fit()   # two factors
    model3 = ols("time ~ sex * year", data).fit()   # two factors with interaction
    
    # Model information
    print((model1.summary()))
    print((model2.summary()))
    print((model3.summary()))
    
    # ANOVAs
    print('----------------- Results ANOVAs: Model 1 -----------------------')
    print((anova_lm(model1)))
    
    print('--------------------- Model 2 -----------------------------------')
    print((anova_lm(model2)))
    
    print('--------------------- Model 3 -----------------------------------')
    model3Results = anova_lm(model3)
    print(model3Results)
    
    # Just to check the correct run
    return model3Results['F'][0] # should be 156.1407931415788

开发者ID:ChengduoZhao，项目名称:statsintro_python，代码行数:30，代码来源:ISP_simpleModels.py

示例2: anova_oneway

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal. '''
    
    # Get the data
    data = getData('altman_910.txt')
    
    # Sort them into groups, according to column 1
    group1 = data[data[:,1]==1,0]
    group2 = data[data[:,1]==2,0]
    group3 = data[data[:,1]==3,0]
    
    # First, check if the variances are equal, with the "Levene"-test
    (W,p) = stats.levene(group1, group2, group3)
    if p<0.05:
        print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p))
    
    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)
    
    # Print the results
    print 'Altman 910:'
    print (F_statistic, pVal)
    if pVal < 0.05:
        print('One of the groups is significantly different.')
        
    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])    
    model = ols('value ~ C(treatment)', df).fit()
    print anova_lm(model)

开发者ID:josef-pkt，项目名称:statsintro，代码行数:29，代码来源:anovaOneway.py

示例3: model_formulas

def model_formulas():
    ''' Define models through formulas '''
    # Get the dta
    data = read_csv(r'..\Data\data_kaplan\swim100m.csv')
    
    # Different models
    model1 = ols("time ~ sex", data).fit()  # one factor
    model2 = ols("time ~ sex + year", data).fit()   # two factors
    model3 = ols("time ~ sex * year", data).fit()   # two factors with interaction
    
    # Model information
    print((model1.summary()))
    print((model2.summary()))
    print((model3.summary()))
    
    # ANOVAs
    print('-----------------------------------------------------------------')
    print((anova_lm(model1)))
    
    print('-----------------------------------------------------------------')
    print((anova_lm(model2)))
    
    print('-----------------------------------------------------------------')
    model3Results = anova_lm(model3)
    print(model3Results)
    
    # Just to check the correct run
    return model3Results['F'][0] # should be 156.1407931415788

开发者ID:nsonnad，项目名称:statsintro，代码行数:28，代码来源:modeling.py

示例4: run_anova

    def run_anova(self):
        ps_table_for_anova = self.ps_table[self.ps_table['Area'].isin(self.params.anova_areas)]

        #ps_lm = mixedlm('prob_diff ~ C(Area) * C(Pulse_Frequency)', data=ps_table_for_anova, groups=ps_table_for_anova['Subject']).fit()
        ps_lm = ols('prob_diff ~ C(Area) * C(Pulse_Frequency)', data=ps_table_for_anova).fit()
        anova = anova_lm(ps_lm)
        self.pass_object('fvalue_rf', anova['F'].values[0:3])
        self.pass_object('pvalue_rf', anova['PR(>F)'].values[0:3])

        ps_table_for_anova_low = ps_table_for_anova[ps_table_for_anova['Pulse_Frequency'].isin([10,25])]
        print 'nsamples =', len(ps_table_for_anova_low)

        ps_lm = ols('prob_diff ~ C(Area) * C(Duration)', data=ps_table_for_anova_low).fit()
        anova = anova_lm(ps_lm)
        self.pass_object('fvalue_rd_low', anova['F'].values[0:3])
        self.pass_object('pvalue_rd_low', anova['PR(>F)'].values[0:3])

        ps_lm = ols('prob_diff ~ C(Area) * C(Amplitude)', data=ps_table_for_anova_low).fit()
        anova = anova_lm(ps_lm)
        self.pass_object('fvalue_ra_low', anova['F'].values[0:3])
        self.pass_object('pvalue_ra_low', anova['PR(>F)'].values[0:3])

        ps_table_for_anova_high = ps_table_for_anova[ps_table_for_anova['Pulse_Frequency'].isin([100,200])]
        print 'nsamples =', len(ps_table_for_anova_high)

        ps_lm = ols('prob_diff ~ C(Area) * C(Duration)', data=ps_table_for_anova_high).fit()
        anova = anova_lm(ps_lm)
        self.pass_object('fvalue_rd_high', anova['F'].values[0:3])
        self.pass_object('pvalue_rd_high', anova['PR(>F)'].values[0:3])

        ps_lm = ols('prob_diff ~ C(Area) * C(Amplitude)', data=ps_table_for_anova_high).fit()
        anova = anova_lm(ps_lm)
        self.pass_object('fvalue_ra_high', anova['F'].values[0:3])
        self.pass_object('pvalue_ra_high', anova['PR(>F)'].values[0:3])

开发者ID:maciekswat，项目名称:ram_utils，代码行数:34，代码来源:RunAnalysis.py

示例5: anova_interaction

def anova_interaction(data_lastDV):
    """
    Two-way ANOVA and interaction analysis of given data
    http://statsmodels.sourceforge.net/devel/examples/generated/example_interactions.html

    Note: 2way ANOVAs are for 2+ categorical independent/causal variables, with 2+ levels each
    :param data: data frame containing the independent variables in first two columns, dependent in the third
    :return: None
    """

    col_names = data_lastDV.columns.values  # get the columns' names
    factor_groups = data_lastDV[col_names].dropna()
    if len(col_names) < 3:
        print("ERROR in statsMOOC.py: Not enough columns in dataframe to do interaction analysis: " + len(col_names))

    # two-way anova
    formula = col_names[2] + " ~ C(" + col_names[0] + ") + C(" + col_names[1] + ")"
    formula_interaction = formula.replace('+', '*')
    interaction_lm = ols(formula, data=factor_groups).fit()  # linear model
    print(interaction_lm.summary())

    print(FORMAT_LINE)
    print("- " + col_names[2] + " = " + col_names[0] + " * " + col_names[1] + " Interaction -")
    print(anova_lm(ols(formula_interaction, data=factor_groups).fit(), interaction_lm))

    print(FORMAT_LINE)
    print("- " + col_names[2] + " = " + col_names[0] + " + " + col_names[1] + " ANOVA -")
    print(anova_lm(ols(col_names[2] + " ~ C(" + col_names[0] + ")", data=factor_groups).fit(), ols(col_names[2] +" ~ C("+col_names[0]+") + C(" + col_names[1]+", Sum)", data=factor_groups).fit()))

    print(FORMAT_LINE)
    print("- " + col_names[2] + " = " + col_names[1] + " + " + col_names[0] + " ANOVA -")
    print(anova_lm(ols(col_names[2] + " ~ C(" + col_names[1] + ")", data=factor_groups).fit(), ols(col_names[2] +" ~ C("+col_names[0]+") + C(" + col_names[1]+", Sum)", data=factor_groups).fit()))

开发者ID:UberHowley，项目名称:spoc-file-processing，代码行数:32，代码来源:statsSPOC.py

示例6: anova_interaction

def anova_interaction():
    '''ANOVA with interaction: Measurement of fetal head circumference,
    by four observers in three fetuses.'''
    
    # Get the data
    data = getData('altman_12_6.txt')
    
    # Bring them in dataframe-format
    df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])
    
    # Determine the ANOVA with interaction
    formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
    lm = ols(formula, df).fit()
    print anova_lm(lm)

开发者ID:josef-pkt，项目名称:statsintro，代码行数:14，代码来源:anovaTwoway.py

示例7: anova

def anova(df, fmla, typ=3):
    from statsmodels.formula.api import ols
    from statsmodels.stats.anova import anova_lm

    # Anova/OLS
    lm = ols(fmla, df).fit()  # 'data' <==> 'df' keyword change with version

    # Grab the pvalues (note we use Type III)
    aov = anova_lm(lm, typ=typ)
    pvals = aov["PR(>F)"]
    pvals.index = map(lambda s: "p_" + s, pvals.index)

    # Grab the explainable sum of squares
    ess = aov.drop("Residual").sum_sq
    ess = ess / ess.sum()
    ess.index = map(lambda s: "ess_" + s, ess.index)

    # Grab the fit
    fit = lm.params
    fit.index = map(lambda s: "fit_" + s, fit.index)

    # I think this happens with pathological inputs
    if np.any(aov["sum_sq"] < 0):
        1 / 0

    return {"lm": lm, "aov": aov, "pvals": pvals, "ess": ess, "fit": fit}

开发者ID:cxrodgers，项目名称:my，代码行数:26，代码来源:stats.py

示例8: test_results

    def test_results(self):
        new_model = ols("np.log(Days+1) ~ C(Duration) + C(Weight)",
                        self.data).fit()
        results = anova_lm(new_model, self.kidney_lm)

        Res_Df = np.array([
             56, 54
            ])
        RSS = np.array([
             29.62486, 28.9892
            ])
        Df = np.array([
             0, 2
            ])
        Sum_of_Sq = np.array([
             np.nan, 0.6356584
            ])
        F = np.array([
             np.nan, 0.5920404
            ])
        PrF = np.array([
             np.nan, 0.5567479
            ])

        np.testing.assert_equal(results["df_resid"].values, Res_Df)
        np.testing.assert_almost_equal(results["ssr"].values, RSS, 4)
        np.testing.assert_almost_equal(results["df_diff"].values, Df)
        np.testing.assert_almost_equal(results["ss_diff"].values, Sum_of_Sq)
        np.testing.assert_almost_equal(results["F"].values, F)
        np.testing.assert_almost_equal(results["Pr(>F)"].values, PrF)

开发者ID:Tskatom，项目名称:Embers_VT，代码行数:30，代码来源:test_anova.py

示例9: anova

def anova(dv):
    """Perform ANOVA."""

    df = make_summary()
    lm = ols('%s ~ C(group) * age * iq' % dv, data=df).fit()
    divider = '---------'
    print divider, dv, divider, '\n', anova_lm(lm, typ=2, robust='hc3')

开发者ID:sammosummo，项目名称:PsychosisDDM，代码行数:7，代码来源:conventional.py

示例10: startanova

    def startanova(self):
        from urllib2 import urlopen
        import numpy as np
        import pandas
        import matplotlib.pyplot as plt
        from statsmodels.formula.api import ols
        from statsmodels.graphics.api import interaction_plot, abline_plot
        from statsmodels.stats.anova import anova_lm


        try:
             rehab_table = pandas.read_csv('rehab.table')
        except:
             url = 'http://stats191.stanford.edu/data/rehab.csv'
             #the next line is not necessary with recent version of pandas
             url = urlopen(url)
             rehab_table = pandas.read_table(url, delimiter=",")
             rehab_table.to_csv('rehab.table')

        print rehab_table

        plt.figure(figsize=(6, 6));
        rehab_table.boxplot('Time', 'Fitness', ax=plt.gca())
        rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
        table9 = anova_lm(rehab_lm,test=self.test,robust=self.robust)
        print table9
        print rehab_lm.model.data.orig_exog
        print rehab_lm.summary()
        plt.show()

开发者ID:vishnumani2009，项目名称:OpenSource-Open-Ended-Statistical-toolkit，代码行数:29，代码来源:anovafront.py

示例11: doAnova

def doAnova(data):
    '''one-way ANOVA'''
    
    df = pd.DataFrame(data)
    model = ols('StressReduction ~ C(Treatment)',df).fit()
    
    anovaResults =  anova_lm(model)
    print(anovaResults)
    if anovaResults['PR(>F)'][0] < 0.05:
        print('One of the groups is different.')

开发者ID:ejmurray，项目名称:statsintro_python，代码行数:10，代码来源:C8_3_multipleTesting.py

示例12: anova_statsmodels

def anova_statsmodels():
    ''' do the ANOVA with a function '''

    # Get the data
    data = pd.read_csv(r'..\Data\data_kaplan\galton.csv')

    anova_results = anova_lm(ols('height ~ 1 + sex', data).fit())
    print('\nANOVA with "statsmodels" ------------------------------')
    print(anova_results)

    return anova_results['F'][0]

开发者ID:ing7t，项目名称:kod，代码行数:11，代码来源:anovaOneway.py

示例13: anova_statsmodels

def anova_statsmodels():
    """ do the ANOVA with a function """

    # Get the data
    data = pd.read_csv(r"..\Data\data_kaplan\galton.csv")

    anova_results = anova_lm(ols("height ~ 1 + sex", data).fit())
    print('\nANOVA with "statsmodels" ------------------------------')
    print(anova_results)

    return anova_results["F"][0]

开发者ID:phaustin，项目名称:statsintro，代码行数:11，代码来源:anovaOneway.py

示例14: test_results

    def test_results(self):
        Df = np.array([2, 2, 2, 54])
        sum_sq = np.array([158.6415227, 16.97129, 0.6356584, 28.9892])
        mean_sq = np.array([79.3207613, 8.485645, 0.3178292, 0.536837])
        f_value = np.array([147.7557648, 15.80674, 0.5920404, np.nan])
        pr_f = np.array([1.262324e-22, 3.944502e-06, 0.5567479, np.nan])

        results = anova_lm(self.kidney_lm)
        np.testing.assert_equal(results["df"].values, Df)
        np.testing.assert_almost_equal(results["sum_sq"].values, sum_sq, 4)
        np.testing.assert_almost_equal(results["F"].values, f_value, 4)
        np.testing.assert_almost_equal(results["PR(>F)"].values, pr_f)

开发者ID:JerWatson，项目名称:statsmodels，代码行数:12，代码来源:test_anova.py

示例15: one_stats

def one_stats(data_lastDV):
    """
    Do basic analysis of one IV onto one DV
    :param data: pandas dataframe we are exploring (IV-of-interest in first column, followed by IVs, and DV in last index)
    :return: None
    """
    col_names = data_lastDV.columns.values.tolist()  # get the columns' names
    causal = col_names.pop(0)  # first item is the topic
    outcome = col_names.pop()  # remove the last item in the list
    topic_data = data_lastDV[[causal, outcome]]

    # descriptive stats
    print(FORMAT_LINE)
    print(topic_data[causal].describe())
    print(FORMAT_LINE)

    fig = plt.figure()
    # bar chart of topics
    ax1 = fig.add_subplot(121)
    df_compare = topic_data.groupby(causal)[causal].count()  # displays num instances assigned to each condition
    ax1 = df_compare.plot(kind='bar', title=causal)
    ax1.set_xlabel(causal)
    ax1.set_ylabel("count instances")
    # scatter plot
    ax2 = fig.add_subplot(122)
    df_compare = data_lastDV.groupby(causal)[outcome].mean()  # displays num helpers selected in each topic
    ax2 = df_compare.plot(kind='bar', title=causal)
    ax2.set_xlabel(causal)
    ax2.set_ylabel("mean " + outcome)
    plt.show()

    # One Way ANOVA
    cond_lm = ols(outcome + " ~ C(" + causal + ")", data=topic_data).fit()
    anova_table = anova_lm(cond_lm)

    print("\n"+FORMAT_LINE)
    print("One-Way ANOVA: " + causal + " --> " + outcome)
    print(FORMAT_LINE)
    print(anova_table)
    #print(cond_lm.model.data.orig_exog)
    print(cond_lm.summary())

    # boxplot of topics --> num helpers selected
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax = topic_data.boxplot(outcome, causal, ax=plt.gca())
    ax.set_xlabel(causal)
    ax.set_ylabel(outcome)
    plt.show()

    for cond in col_names:
        anova_interaction(data_lastDV[[causal, cond, outcome]])
        plot_interaction(data_lastDV[[causal, cond, outcome]])

开发者ID:StrategicC，项目名称:mooc-file-processing，代码行数:53，代码来源:statsMOOC.py

注：本文中的statsmodels.stats.anova.anova_lm函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。