本文整理汇总了Python中statsmodels.stats.multicomp.MultiComparison类的典型用法代码示例。如果您正苦于以下问题:Python MultiComparison类的具体用法?Python MultiComparison怎么用?Python MultiComparison使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MultiComparison类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_incorrect_output
def test_incorrect_output(self):
# too few groups
assert_raises(ValueError, MultiComparison, np.array([1] * 10), [1, 2] * 4)
# too many groups
assert_raises(ValueError, MultiComparison, np.array([1] * 10), [1, 2] * 6)
# just one group
assert_raises(ValueError, MultiComparison, np.array([1] * 10), [1] * 10)
# group_order doesn't select all observations, only one group left
assert_raises(ValueError, MultiComparison, np.array([1] * 10),
[1, 2] * 5, group_order=[1])
# group_order doesn't select all observations,
# we do tukey_hsd with reduced set of observations
data = np.arange(15)
groups = np.repeat([1, 2, 3], 5)
mod1 = MultiComparison(np.array(data), groups, group_order=[1, 2])
res1 = mod1.tukeyhsd(alpha=0.01)
mod2 = MultiComparison(np.array(data[:10]), groups[:10])
res2 = mod2.tukeyhsd(alpha=0.01)
attributes = ['confint', 'data', 'df_total', 'groups', 'groupsunique',
'meandiffs', 'q_crit', 'reject', 'reject2', 'std_pairs',
'variance']
for att in attributes:
err_msg = att + 'failed'
assert_allclose(getattr(res1, att), getattr(res2, att), rtol=1e-14,
err_msg=err_msg)
attributes = ['data', 'datali', 'groupintlab', 'groups', 'groupsunique',
'ngroups', 'nobs', 'pairindices']
for att in attributes:
err_msg = att + 'failed'
assert_allclose(getattr(mod1, att), getattr(mod2, att), rtol=1e-14,
err_msg=err_msg)
示例2: test_table_names_custom_group_order
def test_table_names_custom_group_order(self):
# if the group_order parameter is used, the groups should
# be reported in the specified order
mc = MultiComparison(self.endog, self.groups,
group_order=[b'physical', b'medical', b'mental'])
res = mc.tukeyhsd(alpha=self.alpha)
#print(res)
t = res._results_table
expected_order = [(b'physical',b'medical'),
(b'physical',b'mental'),
(b'medical', b'mental')]
for i in range(1, 4):
first_group = t[i][0].data
second_group = t[i][1].data
assert_((first_group, second_group) == expected_order[i - 1])
示例3: CheckTuckeyHSD
class CheckTuckeyHSD(object):
@classmethod
def setup_class_(self):
self.mc = MultiComparison(self.endog, self.groups)
self.res = self.mc.tukeyhsd(alpha=self.alpha)
def test_multicomptukey(self):
meandiff1 = self.res[1][2]
assert_almost_equal(meandiff1, self.meandiff2, decimal=14)
confint1 = self.res[1][4]
assert_almost_equal(confint1, self.confint2, decimal=2)
reject1 = self.res[1][1]
assert_equal(reject1, self.reject2)
def test_group_tukey(self):
res_t = get_thsd(self.mc,alpha=self.alpha)
assert_almost_equal(res_t[4], self.confint2, decimal=2)
def test_shortcut_function(self):
#check wrapper function
res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha)
assert_almost_equal(res[1][4], self.res[1][4], decimal=14)
示例4: ols
( 29, 'medical', 3 ),
( 30, 'medical', 1 )], dtype=[('idx', '<i4'),
('Treatment', '|S8'),
('StressReduction', '<i4')])
# First, do an one-way ANOVA
df = pd.DataFrame(dta2)
model = ols('StressReduction ~ C(Treatment)',df).fit()
anovaResults = anova_lm(model)
print anovaResults
if anovaResults['PR(>F)'][0] < 0.05:
print('One of the groups is different.')
#Then, do the multiple testing
mod = MultiComparison(dta2['StressReduction'], dta2['Treatment'])
print mod.tukeyhsd()[0]
# The following code produces the same printout
res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment'])
#print res2[0]
# Show the group names
print mod.groupsunique
# Generate a print
import matplotlib.pyplot as plt
plt.plot([0,1,2], res2[1][2], 'o')
plt.errorbar([0,1,2], res2[1][2], yerr=np.abs(res2[1][4].T-res2[1][2]), ls='o')
xlim = -0.5, 2.5
plt.hlines(0, *xlim)
示例5: position_stats
def position_stats(df, name_mapping=None):
# print '### position stats'
from statsmodels.stats.weightstats import ztest
from functools32 import partial, wraps
POS = df.position.unique()
POS.sort()
model = 'value ~ group'
allpvals = None
header = None
DF = None
ttest_log_wrap = wraps(
partial(ttest_ind_log, equal_var=False))(ttest_ind_log)
ttest_ind_nev = wraps(
partial(stats.ttest_ind, equal_var=False))(stats.ttest_ind)
mwu_test = wraps(partial(stats.mannwhitneyu, use_continuity=False))(
stats.mannwhitneyu)
bootstrap_sample_num = 1000
# print df
stats_test = ttest_ind_nev
GROUPS = df.group.unique()
# GROUPS = [0,3]
for pos in POS:
# print pos
data = df[df.position == pos]
data = data.groupby(['sid']).mean()
data = resample_data(data, num_sample_per_pos=BOOTSTRAP_NUM)
# print data
# print data.group.unique()
# data = df[(df.group == 0) | (df.group == 3)]
# print data
# sys.exit()
#cross = smf.ols(model, data=data).fit()
#anova = sm.stats.anova_lm(cross, type=1)
# print data.group
mcp = MultiComparison(data.value, data.group.astype(int))
rtp = mcp.allpairtest(stats_test, method='bonf')
mheader = []
for itest in rtp[2]:
name1 = itest[0]
name2 = itest[1]
if name_mapping is not None:
name1 = name_mapping[str(name1)]
name2 = name_mapping[str(name2)]
mheader.append("{} - {}".format(name1, name2))
if not header or len(mheader) > len(header):
header = mheader
# get the uncorrecte pvals
pvals = rtp[1][0][:, 1]
ndf = pd.DataFrame(data=[pvals], columns=mheader)
if allpvals is None:
allpvals = ndf
else:
allpvals = pd.concat([allpvals, ndf])
# return allpvals
# corr_pvals = allpvals
# print allpvals
# return allpvals
flatten = allpvals.values.ravel()
flatten = flatten * 2
mcpres = multipletests(flatten, alpha=0.05, method='bonf')
# print mcpres
corr_pvals = np.array(mcpres[1])
# print corr_pvals
corr_pvals = np.reshape(corr_pvals, (len(POS), -1))
# print corr_pvals,corr_pvals.shape,header
data = pd.DataFrame(data=corr_pvals, columns=header)
data = data[data.columns[:3]]
return data
示例6: main
def main():
# Note: the statsmodels module is required here.
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
MultiComparison)
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
# Set up the data, as a structured array.
# The first and last field are 32-bit intergers; the second field is an
# 8-byte string. Note that here we can also give names to the individual
# fields!
dta2 = np.rec.array([
( 1, 'mental', 2 ),
( 2, 'mental', 2 ),
( 3, 'mental', 3 ),
( 4, 'mental', 4 ),
( 5, 'mental', 4 ),
( 6, 'mental', 5 ),
( 7, 'mental', 3 ),
( 8, 'mental', 4 ),
( 9, 'mental', 4 ),
( 10, 'mental', 4 ),
( 11, 'physical', 4 ),
( 12, 'physical', 4 ),
( 13, 'physical', 3 ),
( 14, 'physical', 5 ),
( 15, 'physical', 4 ),
( 16, 'physical', 1 ),
( 17, 'physical', 1 ),
( 18, 'physical', 2 ),
( 19, 'physical', 3 ),
( 20, 'physical', 3 ),
( 21, 'medical', 1 ),
( 22, 'medical', 2 ),
( 23, 'medical', 2 ),
( 24, 'medical', 2 ),
( 25, 'medical', 3 ),
( 26, 'medical', 2 ),
( 27, 'medical', 3 ),
( 28, 'medical', 1 ),
( 29, 'medical', 3 ),
( 30, 'medical', 1 )], dtype=[('idx', '<i4'),
('Treatment', '|S8'),
('StressReduction', '<i4')])
# First, do an one-way ANOVA
df = pd.DataFrame(dta2)
model = ols('StressReduction ~ C(Treatment)',df).fit()
anovaResults = anova_lm(model)
print(anovaResults)
if anovaResults['PR(>F)'][0] < 0.05:
print('One of the groups is different.')
#Then, do the multiple testing
mod = MultiComparison(dta2['StressReduction'], dta2['Treatment'])
print((mod.tukeyhsd().summary()))
# The following code produces the same printout
res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment'])
#print res2[0]
# Show the group names
print((mod.groupsunique))
# Generate a print
import matplotlib.pyplot as plt
xvals = np.arange(3)
plt.plot(xvals, res2.meandiffs, 'o')
#plt.errorbar(xvals, res2.meandiffs, yerr=np.abs(res2[1][4].T-res2[1][2]), ls='o')
errors = np.ravel(np.diff(res2.confint)/2)
plt.errorbar(xvals, res2.meandiffs, yerr=errors, ls='o')
xlim = -0.5, 2.5
plt.hlines(0, *xlim)
plt.xlim(*xlim)
pair_labels = mod.groupsunique[np.column_stack(res2._multicomp.pairindices)]
plt.xticks(xvals, pair_labels)
plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' +
'\n Pairwise Mean Differences')
# Save to outfile
outFile = 'MultComp.png'
plt.savefig('MultComp.png', dpi=200)
print(('Figure written to {0}'.format(outFile)))
plt.show()
# Instead of the Tukey's test, we can do pairwise t-test
# First, with the "Holm" correction
rtp = mod.allpairtest(stats.ttest_rel, method='Holm')
print((rtp[0]))
# and then with the Bonferroni correction
print((mod.allpairtest(stats.ttest_rel, method='b')[0]))
# Done this way, the variance is calculated at each comparison.
# If you want the joint variance across all samples, you have to
# use a few tricks:(http://jpktd.blogspot.co.at/2013/03/multiple-comparison-and-tukey-hsd-or_25.html)
res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment'])
studentized_mean = res2.meandiffs
#.........这里部分代码省略.........
示例7: setup_class_
def setup_class_(self):
self.mc = MultiComparison(self.endog, self.groups)
self.res = self.mc.tukeyhsd(alpha=self.alpha)
示例8: print
('Pat', 9),
('Pat', 4),
('Jack', 4),
('Jack', 8),
('Jack', 7),
('Jack', 5),
('Jack', 1),
('Jack', 5),
('Alex', 9),
('Alex', 8),
('Alex', 8),
('Alex', 10),
('Alex', 5),
('Alex', 10)], dtype = [('Archer','|U5'),('Score', '<i8')])
f, p = stats.f_oneway(data[data['Archer'] == 'Pat'].Score,
data[data['Archer'] == 'Jack'].Score,
data[data['Archer'] == 'Alex'].Score)
print ('One-way ANOVA')
print ('=============')
print ('F value:', f)
print ('P value:', p, '\n')
mc = MultiComparison(data['Score'], data['Archer'])
result = mc.tukeyhsd()
print(result)
print(mc.groupsunique)
示例9: run_stats
def run_stats(experiment):
'''Run independent T-test or one-way ANOVA dependent on number of groups.
Args:
experiment (Experiment instance): An instance of the Experiment class.
Returns:
A new Pandas data frame with p values, adjusted p values and Tukey HSD
post-hoc results if there are > 2 groups.
'''
groups = experiment.get_groups()
samples = experiment.get_sampleids()
df = experiment.df
all_vals = []
## Get values for each group, ready for T-test or ANOVA.
for group in groups:
sample_re = re.compile(group + "_\d+$")
ids = [sample for sample in samples if sample_re.match(sample)]
vals = list(map(list, df[ids].values))
all_vals.append(vals)
## Decide whether to use T-test or ANOVA dependent on number of groups.
if len(groups) == 2:
p_vals = [ttest_ind(all_vals[0][i], all_vals[1][i])[1] for i in range(len(all_vals[0]))]
else:
p_vals = []
for i in range(len(all_vals[0])):
row_vals = [all_vals[j][i] for j in range(len(groups))]
p_val = f_oneway(*row_vals)[1]
p_vals.append(p_val)
## Adjust the p values and create a new data frame with them in.
p_val_adj = list(multipletests(p_vals, method='fdr_bh')[1])
new_df = df.ix[:, :5].copy()
new_df['p_val'] = pd.Series(p_vals, index=new_df.index)
new_df['p_val_adj'] = pd.Series(p_val_adj, index=new_df.index)
## Post-hoc test.
## Only do the post-hoc test if there are more than 2 groups, duh!
if len(groups) > 2:
vals_df = df[samples]
group_ids = [sample.split('_')[0] for sample in vals_df.columns.values]
posthoc_results = {}
## Run the post-hoc test on each row.
for row in range(len(vals_df)):
row_vals = vals_df.ix[row]
mc = MultiComparison(row_vals, group_ids)
mc_groups = mc.groupsunique
results = mc.tukeyhsd()
significant = results.reject
pairs = list(zip(*[x.tolist() for x in mc.pairindices]))
## Go through each pair and add results to the posthoc_results dictionary.
for i in range(len(pairs)):
pair = list(pairs[i])
pair.sort()
pair_name = str(mc_groups[pair[0]]) + '_' + str(mc_groups[pair[1]])
if pair_name in posthoc_results:
posthoc_results[pair_name].append(significant[i])
else:
posthoc_results[pair_name] = [significant[i]]
## Add the post-hoc results to the data frame.
for pair_name in posthoc_results:
new_df['significant_' + pair_name] = posthoc_results[pair_name]
return new_df
示例10: MultiComparison
spectraTransform[np.where(dominant == listDominant[10])[0], w],
spectraTransform[np.where(dominant == listDominant[11])[0], w],
spectraTransform[np.where(dominant == listDominant[12])[0], w],
spectraTransform[np.where(dominant == listDominant[13])[0], w],
spectraTransform[np.where(dominant == listDominant[14])[0], w],
spectraTransform[np.where(dominant == listDominant[15])[0], w],
spectraTransform[np.where(dominant == listDominant[16])[0], w],
spectraTransform[np.where(dominant == listDominant[17])[0], w],
spectraTransform[np.where(dominant == listDominant[18])[0], w],
spectraTransform[np.where(dominant == listDominant[19])[0], w],
spectraTransform[np.where(dominant == listDominant[20])[0], w],
spectraTransform[np.where(dominant == listDominant[21])[0], w],
spectraTransform[np.where(dominant == listDominant[22])[0], w])
# If the anova turns back a pvalue < 0.05, do multicomparison to figure out what samples are different
if anovaResults[w, 1] < 0.05:
mc = MultiComparison(spectraTransform[:, w], dominant) # http://statsmodels.sourceforge.net/0.6.0/_modules/statsmodels/stats/multicomp.html
result = mc.tukeyhsd() # http://statsmodels.sourceforge.net/devel/generated/statsmodels.sandbox.stats.multicomp.MultiComparison.tukeyhsd.html
inResults = np.array([mc.groupsunique[mc.pairindices[0]], mc.groupsunique[mc.pairindices[1]], result.meandiffs, result.confint[:, 0], result.confint[:, 1], result.std_pairs, result.reject]).T
inResults = np.column_stack((np.repeat(wavelengths[w], len(result.reject)), inResults))
tukeyResults = np.vstack((tukeyResults, inResults))
# Set up csv file to output statistical results
outStats = file(outLocation + dateTag + '_statistical_analysis.csv', 'wb') # Opening in append mode
row1 = np.hstack(('normal distribution p value for original spectra', normalStats))
row2 = np.hstack(('kurtosis p value for original spectra', kurtosisStats))
row3 = np.hstack(('skew p value for original spectra', skewStats))
row4 = np.hstack(('normal distribution p value for transformed spectra', normalTransformStats))
row5 = np.hstack(('kurtosis p value for transformed spectra', kurtosisTransformStats))
row6 = np.hstack(('skew p value for transformed spectra', skewTransformStats))
row7 = np.hstack(('anova results for transformed spectra', anovaResults[:, 1]))
inRows = np.vstack((row1, row2, row3, row4, row5, row6, row7))