本文整理汇总了Python中statsmodels.stats.proportion.proportion_confint函数的典型用法代码示例。如果您正苦于以下问题:Python proportion_confint函数的具体用法?Python proportion_confint怎么用?Python proportion_confint使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了proportion_confint函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_pos_stats
def get_pos_stats(df, nIdx, cutoff=1, expt=1, letterorder=['C', 'A', 'T', 'G']):
# Get row of interest
data = df[[c for c in df.columns if not c == 'sequence']].iloc[nIdx]
nt = df['sequence'].iloc[nIdx]
total_n = float(data.sum())
# Set up dataframe
ntCols = ['N->'+c for c in letterorder] + ['N->!N']
outsCols = ['ct', '%', '%lb', '%ub']
cols = [x+'_'+out for x in ntCols for out in outsCols] + ['total_n', 'sequence']
out_df = pd.DataFrame(index=[expt], columns=cols)
out_df['sequence'] = nt
out_df['total_n'] = total_n
# Do individual nucleotide stats
for n in letterorder:
ct = data[nt+'->'+n]
rate = ct / total_n
lb, ub = proportion.proportion_confint(ct, total_n, method='jeffrey')
out_df['N->'+n+'_ct'] = ct
out_df['N->'+n+'_%'] = rate
out_df['N->'+n+'_%lb'] = lb
out_df['N->'+n+'_%ub'] = ub
# Do aggregate misincorporation stats
misinc_n = total_n - out_df['N->%c_ct' % nt]
lb, ub = proportion.proportion_confint(misinc_n, total_n, method='jeffrey')
out_df['N->!N_ct'] = misinc_n
out_df['N->!N_%'] = misinc_n / total_n
out_df['N->!N_%lb'] = lb
out_df['N->!N_%ub'] = ub
return out_df
示例2: test_binom_test
def test_binom_test():
#> bt = binom.test(51,235,(1/6),alternative="less")
#> cat_items(bt, "binom_test_less.")
binom_test_less = Holder()
binom_test_less.statistic = 51
binom_test_less.parameter = 235
binom_test_less.p_value = 0.982022657605858
binom_test_less.conf_int = [0, 0.2659460862574313]
binom_test_less.estimate = 0.2170212765957447
binom_test_less.null_value = 1. / 6
binom_test_less.alternative = 'less'
binom_test_less.method = 'Exact binomial test'
binom_test_less.data_name = '51 and 235'
#> bt = binom.test(51,235,(1/6),alternative="greater")
#> cat_items(bt, "binom_test_greater.")
binom_test_greater = Holder()
binom_test_greater.statistic = 51
binom_test_greater.parameter = 235
binom_test_greater.p_value = 0.02654424571169085
binom_test_greater.conf_int = [0.1735252778065201, 1]
binom_test_greater.estimate = 0.2170212765957447
binom_test_greater.null_value = 1. / 6
binom_test_greater.alternative = 'greater'
binom_test_greater.method = 'Exact binomial test'
binom_test_greater.data_name = '51 and 235'
#> bt = binom.test(51,235,(1/6),alternative="t")
#> cat_items(bt, "binom_test_2sided.")
binom_test_2sided = Holder()
binom_test_2sided.statistic = 51
binom_test_2sided.parameter = 235
binom_test_2sided.p_value = 0.0437479701823997
binom_test_2sided.conf_int = [0.1660633298083073, 0.2752683640289254]
binom_test_2sided.estimate = 0.2170212765957447
binom_test_2sided.null_value = 1. / 6
binom_test_2sided.alternative = 'two.sided'
binom_test_2sided.method = 'Exact binomial test'
binom_test_2sided.data_name = '51 and 235'
alltests = [('larger', binom_test_greater),
('smaller', binom_test_less),
('two-sided', binom_test_2sided)]
for alt, res0 in alltests:
# only p-value is returned
res = smprop.binom_test(51, 235, prop=1. / 6, alternative=alt)
#assert_almost_equal(res[0], res0.statistic)
assert_almost_equal(res, res0.p_value, decimal=13)
# R binom_test returns Copper-Pearson confint
ci_2s = smprop.proportion_confint(51, 235, alpha=0.05, method='beta')
ci_low, ci_upp = smprop.proportion_confint(51, 235, alpha=0.1,
method='beta')
assert_almost_equal(ci_2s, binom_test_2sided.conf_int, decimal=13)
assert_almost_equal(ci_upp, binom_test_less.conf_int[1], decimal=13)
assert_almost_equal(ci_low, binom_test_greater.conf_int[0], decimal=13)
示例3: print_survival_rate
def print_survival_rate(df):
for domain_path, domain_group in df.groupby(["domainPath"]):
survival_results = DataFrame(columns="actionDuration algorithmName survival lbound rbound".split())
domain_name = re.search("[^/]+$", domain_path).group(0).rstrip(".track")
for fields, action_group in domain_group.groupby(['algorithmName', 'actionDuration']):
total_trials = len(action_group)
error_experiments = action_group[action_group["errorMessage"].notnull()]
deaths = len(error_experiments[error_experiments["errorMessage"] != "Timeout"])
timeouts = len(error_experiments) - deaths
successes = len(action_group[~action_group["errorMessage"].notnull()])
survival_confint = proportion_confint(successes, total_trials, 0.05)
survival_rate = (successes / (successes + deaths))
survival_results = add_row(survival_results,
[fields[1], fields[0], survival_rate, survival_confint[0], survival_confint[1]])
fig, ax = plt.subplots()
errors = []
for alg, alg_group in survival_results.groupby('algorithmName'):
errors.append([(alg_group['lbound'] - alg_group['survival']).values,
(alg_group['rbound'].values - alg_group['survival']).values])
errors = np.abs(errors)
print(errors)
survival = survival_results.pivot(index='actionDuration', columns='algorithmName', values='survival')
survival.plot(ax=ax, yerr=errors,
xlim=[0, 7000], ylim=[0, 1.0],
capsize=4, capthick=1, ecolor='black', cmap=plt.get_cmap("rainbow"), elinewidth=1)
plt.savefig('test.png', format='png')
示例4: test_binom_tost
def test_binom_tost():
# consistency check with two different implementation,
# proportion_confint is tested against R
# no reference case from other package available
ci = smprop.proportion_confint(10, 20, method='beta', alpha=0.1)
bt = smprop.binom_tost(10, 20, *ci)
assert_almost_equal(bt, [0.05] * 3, decimal=12)
ci = smprop.proportion_confint(5, 20, method='beta', alpha=0.1)
bt = smprop.binom_tost(5, 20, *ci)
assert_almost_equal(bt, [0.05] * 3, decimal=12)
# vectorized, TODO: observed proportion = 0 returns nan
ci = smprop.proportion_confint(np.arange(1, 20), 20, method='beta',
alpha=0.05)
bt = smprop.binom_tost(np.arange(1, 20), 20, *ci)
bt = np.asarray(bt)
assert_almost_equal(bt, 0.025 * np.ones(bt.shape), decimal=12)
示例5: main
def main():
parser = argparse.ArgumentParser(description='extract and combine the kmer stats of multiple files')
parser.add_argument('alpha',type=float,nargs='?', default=0.05, help='alpha of confidence interval')
args = parser.parse_args()
for line in sys.stdin:
fields = line.split()
values = map(int,fields[-4:])
total = sum(values) * 1.0
ci = proportion_confint(values[-1], total, args.alpha, method="wilson")
print line[:-1], values[-1] / total, ci[0], ci[1]
示例6: test_confidence_interval_estimation
def test_confidence_interval_estimation(self):
if "ci" not in self.config["modes"]:
print("Skipping CI")
return
runner = SingleProcessExperimentRunner()
sample_length = self.config["sample_length"]
samples = self.config["samples"]
alpha = self.config["alpha"]
method = "agresti_coull"
estimation_tolerance = 0.1
confidence_intervals = []
all_successes = 0
report_lines = []
""":type : list[dict]"""
fname = "smctest02_ci_{}.csv".format(datetime.now().strftime("%Y%m%d-%H_%M_%S_%f"))
with open(fname, "w") as f:
f.write("I;SUCCESSES;TRIALS\n")
f.flush()
for i in range(0, samples):
_, res, trial_infos = runner.run_trials(self.experiment,
number_of_trials=sample_length,
max_retrials=0)
print(trial_infos)
self.assertEqual(sample_length, len(res))
self.assertEqual(sample_length, len(trial_infos))
successes = sum(res)
all_successes += successes
ci_low, ci_up = proportion.proportion_confint(successes, len(res), alpha=alpha,
method=method)
confidence_intervals.append((ci_low, ci_up))
line = dict(i=i+1, successes=successes, trials=len(res))
f.write("{i};{successes};{trials}\n".format(**line))
f.flush()
print("Run #{}: {} successes, CI: [{}..{}]".format(i + 1, successes, ci_low, ci_up))
# self.experiment.world.printState()
estimated_prob = all_successes / (samples * sample_length)
real_prob = self.calc_real_prob()
print("estimated probability: {}".format(estimated_prob))
print("real probability: {}".format(real_prob))
interval_hit = 0
for cl, cu in confidence_intervals:
if cl <= real_prob <= cu:
interval_hit += 1
interval_hit_ratio = interval_hit / len(confidence_intervals)
print("interval hits: {} of {} = {} %".format(interval_hit, len(confidence_intervals),
interval_hit_ratio * 100.0))
self.assertAlmostEqual(real_prob, estimated_prob, delta=estimation_tolerance)
self.assertTrue(interval_hit_ratio >= (1.0 - alpha))
示例7: test_confint_proportion_ndim
def test_confint_proportion_ndim(method):
# check that it works with 1-D, 2-D and pandas
count = np.arange(6).reshape(2, 3)
nobs = 10 * np.ones((2, 3))
count_pd = pd.DataFrame(count)
nobs_pd = pd.DataFrame(nobs)
ci_arr = proportion_confint(count, nobs, alpha=0.05, method=method)
ci_pd = proportion_confint(count_pd, nobs_pd, alpha=0.05,
method=method)
assert_allclose(ci_arr, (ci_pd[0].values, ci_pd[1].values), rtol=1e-13)
# spot checking one value
ci12 = proportion_confint(count[1, 2], nobs[1, 2], alpha=0.05,
method=method)
assert_allclose((ci_pd[0].values[1, 2], ci_pd[1].values[1, 2]), ci12,
rtol=1e-13)
assert_allclose((ci_arr[0][1, 2], ci_arr[1][1, 2]), ci12, rtol=1e-13)
# check that lists work as input
ci_li = proportion_confint(count.tolist(), nobs.tolist(), alpha=0.05,
method=method)
assert_allclose(ci_arr, (ci_li[0], ci_li[1]), rtol=1e-13)
# check pandas Series, 1-D
ci_pds = proportion_confint(count_pd.iloc[0], nobs_pd.iloc[0],
alpha=0.05, method=method)
assert_allclose((ci_pds[0].values, ci_pds[1].values),
(ci_pd[0].values[0], ci_pd[1].values[0]), rtol=1e-13)
# check scalar nobs, verifying one value
ci_arr2 = proportion_confint(count, nobs[1, 2], alpha=0.05,
method=method)
assert_allclose((ci_arr2[0][1, 2], ci_arr[1][1, 2]), ci12, rtol=1e-13)
示例8: create_confint_df
def create_confint_df(count_df, ignored_cols=['Time']):
"""
"""
words = [c for c in count_df.columns if c not in ignored_cols]
ci_df = pd.DataFrame()
for w in words:
lb, ub = proportion.proportion_confint(counts_df[w], counts_df.sum(axis=1), method='jeffrey')
ci_df['%s_lb' % w] = lb
ci_df['%s_ub' % w] = ub
return ci_df
示例9: test_confidence_interval_estimation
def test_confidence_interval_estimation(self):
runner = SingleProcessExperimentRunner()
sample_length = self.config["sample_length"]
samples = self.config["samples"]
alpha = self.config["alpha"]
method = "agresti_coull"
estimation_tolerance = 0.1
confidence_intervals = []
all_successes = 0
report_lines = []
""":type : list[dict]"""
for i in range(0, samples):
_, res, trial_infos = runner.run_trials(self.experiment,
number_of_trials=sample_length,
step_listeners=[report_step])
print(trial_infos)
self.assertEqual(sample_length, len(res))
self.assertEqual(sample_length, len(trial_infos))
successes = sum(res)
all_successes += successes
ci_low, ci_up = proportion.proportion_confint(successes, len(res), alpha=alpha,
method=method)
confidence_intervals.append((ci_low, ci_up))
report_lines.append(dict(i=i+1, successes=successes, trials=len(res)))
print("Run #{}: {} successes, CI: [{}..{}]".format(i + 1, successes, ci_low, ci_up))
estimated_prob = all_successes / (samples * sample_length)
real_prob = self.calc_real_prob()
print("estimated probability: {}".format(estimated_prob))
print("real probability: {}".format(real_prob))
interval_hit = 0
for cl, cu in confidence_intervals:
if cl <= real_prob <= cu:
interval_hit += 1
interval_hit_ratio = interval_hit / len(confidence_intervals)
print("interval hits: {} of {} = {} %".format(interval_hit, len(confidence_intervals),
interval_hit_ratio * 100.0))
with open("smctest01_ci.csv", "w") as f:
f.write("I;SUCCESSES;TRIALS\n")
for line in report_lines:
f.write("{i};{successes};{trials}\n".format(**line))
示例10: BinomialErrors
def BinomialErrors(nobs, Nsamp, alpha=0.05, method='jeffrey'):
"""
This is basically just statsmodels.stats.proportion.proportion_confint
with a different default method. It also returns the proportion nobs/Nsamp
Parameters:
===========
- nobs: integer
The number of "successes"
- Nsamp: integer
The total number of trials. Should be >= nobs.
- alpha: float in (0, 1)
Probability that the true value lies outside the
resulting error (or something like that).
alpha=0.05 is about 2-sigma.
- method: string
The calculation method. This is just passed to
`statsmodels.stats.proportion.proportion_confint`
Returns:
========
- prob: float
The estimate for the probability. prob = nobs / Nsamp
- low: float
The lower bound on the probability
- high: float
The upper bound on the probability
"""
low, high = proportion_confint(nobs, Nsamp, method=method, alpha=alpha)
if nobs == 0:
low = 0.0
p = 0.0
elif nobs == Nsamp:
high = 1.0
p = 1.0
else:
p = float(nobs) / float(Nsamp)
return p, low, high
示例11: plot
def plot():
dic = pkl.load( open(cnst.network_data + "figs/LSTA-bullshit/scales/new/dominant_scales_save/scales.p", "rb"))
bin = np.array(dic['bin'])
center = bin[0:-1] + (bin[1::]-bin[0:-1])
data = dic['blob']- (np.sum(dic['blobc'], axis=0)/np.sum(dic['blobc']))#dic['scale']#(np.sum(dic['blobc'], axis=0)/np.sum(dic['blobc']))## (np.sum(dic['blobc'], axis=0)/np.sum(dic['blobc'])) #dic['scale'] #(np.sum(dic['blobc'], axis=0)/np.sum(dic['blobc']))
db = dic['blobc']
filler = np.zeros_like(db)
for i in range(db.shape[0]):
for j in range(db.shape[1]):
low , up = prop.proportion_confint(db[i,j], np.nansum(db[i,:]))
unrange = (db[i,j] / np.nansum(db[i,:])) - low
filler[i,j] = unrange
mask = np.zeros_like(db)
mask[filler>np.abs(data)] = 1
data[np.where(mask)] = 0
f = plt.figure()
ax = plt.subplot(111)
pmap = ax.pcolormesh(data*100, vmin=-2, vmax=2, cmap='RdBu_r')
ax.set_xticks(np.arange(dic['blob'].shape[1])+1, minor=False)
ax.set_xticklabels(center)
cbar = plt.colorbar(pmap)
cbar.set_label('Difference in scale frequency | Blobs')
ax.set_yticks(np.arange(dic['blob'].shape[0]) + 1, minor=False)
ax.set_yticklabels(np.arange(0,24))
ax.set_xlabel('Surface Scales of pos/neg deviation to surroundings')
ax.set_ylabel('Hours')
ax1 = ax.twinx()
ax1.set_yticks(np.arange(dic['blob'].shape[0])+1, minor=False)
ax1.set_yticklabels(dic['nblobs'])
plt.show()
print(np.sum(dic['blobc']>0)/np.sum(dic['nblobs']))
print(np.sum(np.isfinite(dic['blobc'])))
print(np.sum(data, axis=0))
示例12: determine_p_est_by_interval
def determine_p_est_by_interval(df, key, max_delay, search_confidence, search_max_p_rel_interval_len):
num_sample_cases = df.shape[0]
#Estimate probability of getting a max delay path from the entire MC sim
num_max_delay_cases = df.value_counts()[max_delay]
p_est = num_max_delay_cases / float(num_sample_cases)
print "P_est: {}".format(p_est)
#Calculate the interval to see if it has converged
#
#The 'beta' (Clopper-Pearson) method is a pessimistic interval which gaurentees
#to cover the alpha-significant interval, but it may be conservative (i.e. it may
#cover a more significant (smaller alpha)
alpha = 1. - search_confidence
ci = sms_sp.proportion_confint(num_max_delay_cases, num_sample_cases, alpha=alpha, method="beta")
#Convert tuple to array
ci = [ci[0], ci[1]]
if max_delay == 0 and math.isnan(ci[1]):
print "Warning: end of confidence interval was nan for max_delay 0; forcing to 1."
ci[1] = 1.
assert not math.isnan(ci[0])
assert not math.isnan(ci[1])
ci_len = ci[1] - ci[0]
ci_len_ratio = ci_len / p_est
print "P_est CI: [{:g}, {:g}] @ alpha={} ci_len/P_est={}".format(ci[0], ci[1], alpha, ci_len_ratio)
if p_est < ci[0] or p_est > ci[1]:
msg = "Estimate {:g} falls outside confidence interval [{:g}, {:g}]: NOT CONVERGED".format(p_est, ci[0], ci[1])
raise NotConvergedException(msg, num_sample_cases)
if ci_len_ratio > search_max_p_rel_interval_len:
msg = "Normalized CI delta (ci[1] - ci[0])/p_ext={:g} exceeds target={:g}: NOT CONVERGED".format(ci_len_ratio, search_max_p_rel_interval_len)
raise NotConvergedException(msg, num_sample_cases)
return p_est, ci
示例13: BinomialErrors
def BinomialErrors(nobs, Nsamp, alpha=0.05, method='jeffrey'):
"""
This is basically just statsmodels.stats.proportion.proportion_confint
with a different default method. It also returns the proportion nobs/Nsamp
"""
low, high = proportion_confint(nobs, Nsamp, method=method, alpha=alpha)
if nobs == 0:
low = 0.0
p = 0.0
elif nobs == Nsamp:
high = 1.0
p = 1.0
else:
p = float(nobs) / float(Nsamp)
return p, low, high
示例14: get_stats
def get_stats(df, cutoff=1):
data = df[[c for c in df.columns if not c == 'sequence']]
total_n = data.sum(axis=1) # All non-corresponding data should be zero
correct_n = data[[n+'->'+n for n in ['A', 'C', 'T', 'G']]] # Get the columns that correspond to correct incorporations
misinc_n = total_n - correct_n.sum(axis=1) # Same as above.
#
rate = misinc_n / total_n
lb, ub = proportion.proportion_confint(misinc_n, total_n, method='jeffrey')
# Assemble output dataframe
simp_df = pd.DataFrame()
simp_df['rate'] = rate
simp_df['lb'] = lb
simp_df['ub'] = ub
simp_df['n'] = total_n
simp_df['sequence'] = df.sequence
return simp_df
示例15: plot_band_psychometric
def plot_band_psychometric(validPerSNR, rightPerSNR, possibleSNRs, colour = 'k', linestyle='-', xlabel=True, ylabel=True):
from statsmodels.stats.proportion import proportion_confint
performance = []
upper = []
lower = []
for inds in range(len(possibleSNRs)):
CIthisSNR = np.array(proportion_confint(rightPerSNR[inds], validPerSNR[inds], method = 'wilson'))
performance.append(100.0*rightPerSNR[inds]/validPerSNR[inds])
upper.append(100.0*CIthisSNR[1]-performance[-1])
lower.append(performance[-1]-100.0*CIthisSNR[0])
plt.plot(np.arange(len(possibleSNRs)), performance, linestyle, marker='o', color=colour, mec=colour, lw=3, ms=10)
plt.errorbar(np.arange(len(possibleSNRs)), performance, yerr = [lower, upper], color=colour, lw=2, ls=linestyle)
if ylabel:
plt.ylabel("% rightward", fontsize=16)
if xlabel:
plt.xlabel('SNR (dB)', fontsize=16)
plt.xticks(np.arange(len(possibleSNRs)), possibleSNRs)
plt.ylim((0,100))
ax = plt.gca()
extraplots.boxoff(ax)