本文整理汇总了Python中matplotlib.pyplot.boxplot函数的典型用法代码示例。如果您正苦于以下问题:Python boxplot函数的具体用法?Python boxplot怎么用?Python boxplot使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了boxplot函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: plot_difficulties
def plot_difficulties(difficulties, bins=10):
# Data
plot_data = []
names = []
for y_true, c_val in [(0,0), (0,1), (1,0), (1,1)]:
diff_yc = difficulties[2*y_true+c_val]
plot_data.append(diff_yc)
names.append('y=%d, c=%d' %(y_true, c_val))
print("y=%d, c=%d, mean=%.5f, std=%.5f" % (y_true, c_val, np.mean(diff_yc), np.std(diff_yc)))
# Boxplots
fig, axes = plt.subplots()
plt.boxplot(plot_data)
xtickNames = plt.setp(axes, xticklabels=names)
axes.set_ylim([-.01, 1.01])
axes.set_ylabel('Difficulty')
plt.show()
# Histogram
fig, axes = plt.subplots()
plt.yscale('log', nonposy='clip')
hist = plt.hist(plot_data, label=names, bins=bins)
plt.legend()
axes.set_xlabel('Difficulty')
axes.set_ylabel('Count (log-scale)')
plt.show()
示例2: bivariate_analysis_catg_cont
def bivariate_analysis_catg_cont(catg_cont_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE):
# No need to remove string varible as they are handled by chi2 function of sklearn.
# clean_catg_cont_list = clean_str_list(df,catg_cont_list)
clean_catg_cont_list = catg_cont_list
clean_df = df.dropna()
for col in clean_catg_cont_list:
col_classes =df[target_name].unique()
summary = clean_df[col].describe()
count = summary[0]
mean = summary[1]
std = summary[2]
plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER)
plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10)
x = [np.array(clean_df[clean_df[target_name]==i][col]) for i in col_classes]
y = clean_df[target_name]
f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y)
plt.xlabel(target_name+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10)
plt.ylabel(col, fontsize=10)
plt.boxplot(x)
print (col+" vs "+target_name+" plotted....")
COUNTER +=1
return plt,COUNTER
示例3: plot
def plot(lookup):
data = []
for iiDiameter in sorted(lookup.keys()):
data.append(lookup[iiDiameter])
plt.boxplot(data, sym='')
plt.setp(plt.gca(),'xticklabels',sorted(lookup.keys()))
plt.show()
示例4: handle
def handle(self, *args, **options):
fs = 10 # fontsize
versions = models.SourceLine.objects.filter(
project__startswith='django-').order_by(
'project').values_list(
'project', 'progradon__complexity')
for vers, complexity_iter in itertools.groupby(
versions, key=operator.itemgetter(1)):
print vers, ':'
print '-', ', '.join(str(x) for x in complexity_iter)
data = models.SourceLine.objects.filter(
project='django-1.0.1').values_list(
'progradon__complexity', flat=True)
plt.boxplot(data) # , labels=labels)
plt.show()
# xs, ys, areas = zip(*data)
# ys = areas
# colors = np.random.rand(len(xs))
# plt.scatter(xs, ys, c=colors) # s=areas)
# plt.xlabel('file index')
# plt.ylabel('version index')
plt.savefig('z.png')
示例5: visualize_performance
def visualize_performance(self):
intra = self._intra
inter = self._inter
labels = [1]*len(intra) + [-1]*len(inter)
scores = intra+inter
self._common_visualize_performance( labels, scores)
plt.figure()
plt.boxplot([intra, inter])
plt.xticks([1, 2], ['intra', 'inter'])
plt.title('Distribution of scores')
plt.savefig('comparison_score_distribution.pdf')
plt.figure()
start = np.min(np.min(intra), np.min(inter))
end = np.max(np.max(intra), np.max(inter))
intra_hist, intra_bin = np.histogram(intra,50, (start, end))
inter_hist, inter_bin = np.histogram(inter,50, (start, end))
plt.plot(intra_bin[:-1], intra_hist/float(intra_hist.sum()), label='intra', color='blue')
plt.plot(inter_bin[:-1], inter_hist/float(inter_hist.sum()), label='inter', color='red')
plt.legend()
plt.xlabel('Comparison scores')
plt.ylabel('Probability')
plt.title('Score distribution')
示例6: stats_fn
def stats_fn(data_frame):
global scene
stat_file = open("Stat_tests_" + scene[:-4] + ".txt", "w")
seen_pairs = []
for algorithm in data_frame:
for algorithm2 in data_frame:
if (algorithm != algorithm2) and ((algorithm, algorithm2) not in seen_pairs):
seen_pairs.append((algorithm, algorithm2))
seen_pairs.append((algorithm2, algorithm))
statistical_significance = stats.wilcoxon(data_frame[algorithm], data_frame[algorithm2])
print >> stat_file, algorithm, " VS ", algorithm2, " -->", statistical_significance
print >> stat_file, algorithm, " median = ", np.median(data_frame[algorithm])
print >> stat_file, algorithm2, " median = ", np.median(data_frame[algorithm2])
print >> stat_file, "----------------------------------------------------------"
# # This part is for drawing the different boxplots
figure_name = scene + "_.png"
current_path = os.getcwd()
os.chdir("/home/omohamme/INRIA/experiments/moop_sim_comparison/boxplots/" + scene[:-4] + "/")
plt.figure(figsize=(15.0, 11.0))
plt.boxplot(data_frame.values())
plt.xticks(range(1, len(data_frame.keys()) + 1), data_frame.keys())
plt.title(figure_name)
plt.savefig(figure_name)
os.chdir(current_path)
stat_file.close()
示例7: create_boxplot
def create_boxplot(data, save_dir, correct_entropy=1):
"""
data_file - path file containing entropy values for the lines added by the mutant files
save_directory - directory to save the plot in, not including the name of the plot itself
correct_entropy - the entropy of the lines added by the repair program
"""
print "CREATE BOXPLOT"
# fid = open(data_file,'r')
# data=[float(l.strip()) for l in fid.readlines()]
print data
assert len(data) > 0
# plot mutant entropy
plt.boxplot(data)
# plot correct entropy
p1 = plt.plot([0, 2], [correct_entropy, correct_entropy], color="g")
# label the repaired program
l1 = plt.legend([p1], ["repaired program"])
# annotate the plot
plt.ylabel("Entropy (bits)")
plt.title("Entropy of lines added in mutant programs")
# generate a random number as the name of the plot
name = str(random.randint(0, sys.maxint))
plt.savefig(os.path.join(save_dir, name + ".png"), bbox_inches=0)
print os.path.join(save_dir, name + ".png")
return name
示例8: plot
def plot(revisions, benchmarks, subdir='.', baseurl='https://github.com/idaholab/moose/commit/'):
data = []
labels = []
for rev, bench in zip(revisions, benchmarks):
data.append(bench.realruns)
labels.append(rev[:7])
median = sorted(data[0])[int(len(data[0])/2)]
plt.axhline(y=median*1.05, linestyle='--', linewidth=2, color='red', alpha=.5, label='+5%')
plt.axhline(y=median*1.01, linestyle=':', linewidth=2, color='red', label='+1%')
plt.axhline(y=median, dashes=[48, 4, 12, 4], color='black', alpha=.5)
plt.axhline(y=median*.99, linestyle=':', linewidth=2, color='green', label='-1%')
plt.axhline(y=median*.95, linestyle='--', linewidth=2, color='green', alpha=.5, label='-5%')
plt.boxplot(data, labels=labels, whis=1.5)
plt.xticks(rotation=90)
plt.ylabel('Time (seconds)')
fig = plt.gcf()
ax = fig.axes[0]
labels = ax.get_xticklabels()
for label in labels:
label.set_url(urlparse.urljoin(baseurl, label.get_text()))
legend = ax.legend(loc='upper right')
fig.subplots_adjust(bottom=.15)
fig.savefig(os.path.join(subdir, benchmarks[0].name + '.svg'))
plt.clf()
示例9: plot
def plot(work_time_deltas_hours):
# 45 minutes break is assumed
work_overtime = sum([w - 8.75 for w in work_time_deltas_hours ])
plt.boxplot(work_time_deltas_hours)
plt.ylabel("Working Hours")
plt.xticks([0,1,2],())
yvalues = numpy.arange(numpy.floor(numpy.min(work_time_deltas_hours)),numpy.ceil(numpy.max(work_time_deltas_hours)),0.25)
plt.yticks(yvalues,[ str(math.floor(x)) + "h " + str(int((x % 1.0) * 60)) +"min" for x in yvalues],rotation=0)
# Debug
print("Mean: "+str(numpy.mean(work_time_deltas_hours)))
print("Min: "+str(numpy.min(work_time_deltas_hours)))
print("Max: "+str(numpy.max(work_time_deltas_hours)))
print("Median: "+str(numpy.median(work_time_deltas_hours)))
print("Work overtime: "+ str(work_overtime))
print("Days tracked: "+str(len(work_time_deltas_hours)))
plt.text(1.35,10,"Mean: " + str(math.floor(numpy.mean(work_time_deltas_hours))) + "h " + str(int((numpy.mean(work_time_deltas_hours) % 1.0) * 60)) + "min"
"\nMax: " + str(math.floor(numpy.max(work_time_deltas_hours))) + "h " + str(int((numpy.max(work_time_deltas_hours) % 1.0) * 60)) + "min"
"\nMin: "+ str(math.floor(numpy.min(work_time_deltas_hours))) + "h " + str(int((numpy.min(work_time_deltas_hours) % 1.0) * 60)) + "min"
"\nMedian: "+ str(math.floor(numpy.median(work_time_deltas_hours))) + "h " + str(int((numpy.median(work_time_deltas_hours) % 1.0) * 60)) + "min"+
"\nOvertime: " + str(math.floor(work_overtime)) +"h "+ str(int((work_overtime % 1.0) * 60)) + "min" +
"\nDays: " + str(len(work_time_deltas_hours)),
bbox=dict(boxstyle='round', facecolor='white', alpha=0.5))
plt.title("Working Hours Boxplot")
plt.show()
示例10: sale_price_per_sq_foot_boxplot
def sale_price_per_sq_foot_boxplot(self, groupby, title):
"""Boxplot of sale price per square foot, grouped by a groupby variable
title is the plot title"""
fig = init_fig()
# This figure needs to be extra wide
fig.set_size_inches(10, 4)
# Remove missings and restrict to the columns we need
data = self.data[[groupby, "sale_price_per_sqft"]].dropna()
# The boxplot function takes a list of Series, so we make one Series for each
# group, and append them all into a list
groups = list()
values = data[groupby].value_counts().index # All the levels of the groupby variable
for value in values:
groups.append(data.loc[data[groupby] == value, "sale_price_per_sqft"])
# Now make the plot. The empty string means we don't want the outliers, since
# they will mess up the axis scale
plt.boxplot(groups, 0, "")
plt.ylabel("Sale Price per Sq. Ft.")
plt.title(title)
plt.xticks(np.arange(len(values)) + 1, values)
return fig_to_svg(fig)
示例11: distance_distribution_plot
def distance_distribution_plot(learner,box_kwargs=None,**kwargs):
"""
plots the distribution of distances to/from predicted events from/to
actual events, dependning on kwargs
Args:
learner: the learner object to use
kwargs: passed to event_distance_distribution (ie: to_true=T/F)
"""
train_scores = learner._scores_by_params(train=True)
valid_scores = learner._scores_by_params(train=False)
if (box_kwargs is None):
box_kwargs = dict(whis=[5,95])
name = learner.description.lower()
x_values = learner.param_values()
train_dist = Learning.event_distance_distribution(train_scores,**kwargs)
valid_dist = Learning.event_distance_distribution(valid_scores,**kwargs)
dist_plot = lambda x: [v for v in x]
train_plot = dist_plot(train_dist)
valid_plot = dist_plot(valid_dist)
plt.boxplot(x=train_plot,**box_kwargs)
plt.boxplot(x=valid_plot,**box_kwargs)
plt.gca().set_yscale('log')
PlotUtilities.lazyLabel("Tuning parameter","Distance Distribution (idx)",
"Event distributions for {:s}".format(name),
frameon=False)
示例12: boxplot_by_pft
def boxplot_by_pft(var, timestep, cmtnum, stages, ref_veg_map, ref_run_status):
'''
Work in progress...
'''
data, units = stitch_stages(var, timestep, stages)
print "data size:", data.size
print data.shape
d2 = data
# d2 = sum_across_compartments(data)
# print "data size after summing compartments:", d2.size
d3 = mask_by_cmt(d2, cmtnum, ref_veg_map)
print "data size after masking cmt:", d3.count()
d3 = mask_by_failed_run_status(d3, ref_run_status)
print "data count after masking run status:", d3.count()
pft0avg = np.ma.average(d3, axis=(2,3))
#plt.plot(pft0avg) # Line plot
plt.boxplot(
pft0avg,
labels = ["PFT {}".format(i) for i in range(0, 10)],
whis='range',
showfliers=False,
patch_artist=True,
boxprops=dict(color='blue', alpha=0.25),
whiskerprops=dict(color='red'),
capprops=dict(color='blue'),
)
plt.ylabel(units)
plt.show(block=True)
示例13: make_plot_lfw_reorder_other
def make_plot_lfw_reorder_other(save=False):
conn = pm.Connection()
db = conn['hyperopt']
Jobs = db['jobs']
exp_key = 'thor_model_exploration.model_exploration_bandits.LFWBanditModelExplorationOther/hyperopt.Random'
H = Jobs.group(['spec.order'],
{'exp_key': exp_key, 'state':2,
'spec.preproc.size.0':250
},
{'losses': []},
'function(d, o){o.losses.push(d.result.loss);}')
order_choices = params.order_choices
ords = pluck(H, 'spec.order')
reinds = [ords.index(_o) for _o in order_choices]
H = [H[_r] for _r in reinds]
od = {'lpool': 'p', 'activ': 'a', 'lnorm': 'n'}
order_labels = [','.join([od[b] for b in Before]) + '|' + ','.join([od[b] for b in After]) for (Before, After) in order_choices]
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(18,8))
plt.boxplot([1-np.array(h['losses']) for h in H])
means = [1-np.array(h['losses']).mean() for h in H]
plt.plot(range(1,len(H)+1), means, color='green')
plt.scatter(range(1,len(H)+1), means)
plt.xticks(range(1,len(ords)+1), order_labels, rotation=60)
plt.ylabel('Absolute performance')
plt.xlabel('Architecture tag')
示例14: main
def main():
data = []
data_month = []
# Post to database
con = mdb.connect(host='192.168.1.143', db='monitor', user='crblackw')
#Format of data structure
#[mm][dd][data]
#mm: This is the month of the dataset. Keep in mind that it is indexed from zero. So August (8) is actually 7.
#dd: This is the day within the month.
#data: This is an array of the the data from the day. Each datapoint is a tuple of (datetime, value).
with con:
cur = con.cursor()
#cur.execute("SELECT temp_actual FROM sensor1 GROUP BY HOUR(datetime) LIMIT 0, 30")
for m in range(1,12):
for d in range(1,31):
cur.execute("SELECT datetime,temp_actual FROM sensor1 WHERE DAY(datetime) = %i AND MONTH(datetime) = %i" %(d,m))
data_month.append(np.array(cur.fetchall()))
data.append(data_month)
data_month = []
con.close()
plt.boxplot(data[7-1][11][:,1])
plt.show()
'''
示例15: __create_num_threads_vs_jct_graph
def __create_num_threads_vs_jct_graph(num_threads_to_jcts, output_dir, phase):
"""
Create a graph of num threads per disk vs. JCT for the specified phase, which must be either
"write" or "read". num_threads_to_jcts should be a dictionary of the form:
{ num threads : ( list of write JCTs, list of read JCTs ) }
"""
assert phase in ["write", "read"]
num_ticks = len(num_threads_to_jcts) + 2
xmax = num_ticks - 1
max_jct = max([jct
for write_jcts, read_jcts in num_threads_to_jcts.itervalues()
for jct in (write_jcts if phase == "write" else read_jcts)])
ymax = max_jct * 1.1
pyplot.title("Num threads per disk vs. JCT ({} phase)".format(phase))
pyplot.xlabel("Num threads per disk")
pyplot.ylabel("JCT (s)")
pyplot.grid(b=True)
pyplot.xlim(xmin=0, xmax=xmax)
pyplot.ylim(ymin=0, ymax=ymax)
# Build a list of lists of JCTs, sorted by num threads per disk.
all_jcts = [write_jcts if phase == "write" else read_jcts
for _, (write_jcts, read_jcts) in sorted(num_threads_to_jcts.iteritems())]
pyplot.boxplot(all_jcts, whis=[0, 100])
# Replace the visually-correct x-axis values with the numerically correct values.
pyplot.xticks(xrange(num_ticks), [""] + sorted(num_threads_to_jcts.keys()) + [""])
# Save the graph as a PDF.
output_filepath = path.join(output_dir, "{}_phase_num_threads_vs_jct.pdf".format(phase))
with backend_pdf.PdfPages(output_filepath) as pdf:
pdf.savefig()
pyplot.close()