本文整理匯總了Python中pandas.core.frame.DataFrame.groupby方法的典型用法代碼示例。如果您正苦於以下問題:Python DataFrame.groupby方法的具體用法?Python DataFrame.groupby怎麽用?Python DataFrame.groupby使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pandas.core.frame.DataFrame
的用法示例。
在下文中一共展示了DataFrame.groupby方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_daily_normals
# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
def get_daily_normals(self, start_date = None, end_date = None, stamp_year = 2001):
"""
:type start_date: datetime.datetime
:type end_date: datetime.datetime
:rtype : list , list
"""
self.stamp_day_dates = pandas.DatetimeIndex(start = datetime(stamp_year,1,1), end = date(stamp_year, 12, 31),
freq = pandas.datetools.offsets.Day())
if start_date is None:
start_date = self.time[0]
if end_date is None:
end_date = self.time[-1]
di = pandas.DatetimeIndex(data = self.time)
df = DataFrame(data = self.data, index = di, columns=["values",])
df = df.select( lambda d: start_date <= d <= end_date )
df_mean = df.groupby(by = lambda d: (d.day, d.month)).mean()
return self.stamp_day_dates, df_mean.ix[[ (d.day, d.month) for d in self.stamp_day_dates] ,"values"]
示例2: main
# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
#.........這裏部分代碼省略.........
if shared_ax is None:
ax = fig.add_subplot(gs[row, col])
shared_ax = ax
assert isinstance(shared_ax, Axes)
else:
ax = fig.add_subplot(gs[row, col])
ax.xaxis.set_major_locator(locator)
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
ax.xaxis.set_major_formatter(fmt)
sfmt = ScalarFormatter(useMathText=True)
sfmt.set_powerlimits((-3, 4))
ax.yaxis.set_major_formatter(sfmt)
assert isinstance(ax, Axes)
axes.append(ax)
# generate daily stamp dates
d0 = datetime(2001, 1, 1)
stamp_dates = [d0 + timedelta(days=i) for i in range(365)]
# plot a panel for each station
for s, ax, row, col in zip(stations, axes, row_indices, col_indices):
assert isinstance(s, Station)
assert isinstance(ax, Axes)
if s.grdc_monthly_clim_max is not None:
ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)
avail_years = s.get_list_of_complete_years()
print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
years = [y for y in avail_years if start_year <= y <= end_year]
_, obs_clim_stfl = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=stamp_dates, years=years)
if obs_clim_stfl is None:
continue
ax.plot(stamp_dates, obs_clim_stfl, "k", lw=3, label="Obs")
if s.river_name is not None and s.river_name != "":
ax.set_title(s.river_name)
else:
ax.set_title(s.id)
for path, sim_label, color in zip(paths, labels, colors):
ds = Dataset(path)
if stations_to_mp is None:
acc_area_2d = ds.variables["accumulation_area"][:]
lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
lons2d, lats2d, x_index, y_index)
# read dates only once for a given simulation
if sim_label not in sim_to_time:
time_str = ds.variables["time"][:].astype(str)
times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
sim_to_time[sim_label] = times
mp = stations_to_mp[s]
data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
print(path)
df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
df["year"] = df.index.map(lambda d: d.year)
df = df.ix[df.year.isin(years), :]
df = df.select(lambda d: not (d.month == 2 and d.day == 29))
df = df.groupby(lambda d: datetime(stamp_dates[0].year, d.month, d.day)).mean()
daily_model_data = [df.ix[d, "value"] for d in stamp_dates]
# print np.mean( monthly_model ), s.river_name, sim_label
ax.plot(stamp_dates, daily_model_data, color, lw=3, label=sim_label + "(C)")
if plot_future:
ax.plot(stamp_dates, daily_model_data, color + "--", lw=3, label=sim_label + "(F2)")
ds.close()
if row < nrows - 1:
ax.set_xticklabels([])
axes[0].legend(fontsize=17, loc=2)
plt.tight_layout()
plt.savefig("offline_validation.png", dpi=400)
plt.close(fig)
r = RPN("/RESCUE/skynet3_rech1/huziy/CNRCWP/C3/Depth_to_bedrock_WestNA_0.25")
r.get_first_record_for_name("8L")
proj_params = r.get_proj_parameters_for_the_last_read_rec()
lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
bsmp = RotatedLatLon(**proj_params).get_basemap_object_for_lons_lats(lons2d=lons, lats2d=lats)
plot_utils.apply_plot_params(width_pt=None, width_cm=19, height_cm=19, font_size=12)
plot_station_positions(manager=None, station_list=stations, bsmp=bsmp)
示例3: arrays_add
# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
all.add(id)
arrays_add(id, "[" + data.test_build.branch + "][" + data.testrun.suite + "]", data)
output_file.write(str(id) + "\t" + json)
except Exception, e:
Log.warning("can not process line:\n\t" + line, e)
smallest = min(*all)
Log.println("First id >= date: {{min}}", {"min": smallest})
df = DataFrame(arrays, columns=["id", "path", "length", "count"])
colNames = [str(p) + " to " + str(parts[i + 1] - 1) for i, p in enumerate(parts[0:-1])]
# http://pandas.pydata.org/pandas-docs/stable/groupby.html#na-group-handling
length_dim = pandas.cut(df.length, parts, labels=colNames, right=False)
summary = df.groupby(["path", length_dim], sort=False).size()
#summary=summary.reindex(length_dim, level="length")
table = summary.unstack("length")
s = CNV.DataFrame2string(table)#, columns=colNames)
Log.println("\n" + s)
with open("talos_big_array_summary.tab", "w") as output_file:
output_file.write(s)
sum2 = df.groupby(["path", "length"]).size()
tab2 = sum2.unstack("length")
s = CNV.DataFrame2string(tab2)#, columns=colNames)
Log.println("\n" + s)
with open("talos_every_population.tab", "w") as output_file:
output_file.write(s)
biggest = df[df.length == 63000]
示例4: main
# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
#.........這裏部分代碼省略.........
continue
print(obs_clim_stfl.head())
obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)
if s.river_name is not None and s.river_name != "":
ax.set_title(s.river_name)
else:
ax.set_title(s.id)
for path, sim_label, color in zip(paths, labels, colors):
ds = Dataset(path)
if stations_to_mp is None:
acc_area_2d = ds.variables["accumulation_area"][:]
lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
lons2d, lats2d, x_index, y_index)
# read dates only once for a given simulation
if sim_label not in sim_to_time:
time_str = ds.variables["time"][:].astype(str)
times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
sim_to_time[sim_label] = times
mp = stations_to_mp[s]
data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
print(path)
df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
df["year"] = df.index.map(lambda d: d.year)
df = df.ix[df.year.isin(years), :]
df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()
# print np.mean( monthly_model ), s.river_name, sim_label
df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")
ds.close()
if row < nrows - 1:
ax.set_xticklabels([])
axes[0].legend(fontsize=17, loc=2)
plt.tight_layout()
plt.savefig("mh/offline_validation_mh.png", dpi=400)
plt.close(fig)
with Dataset(infocell_path) as ds:
fldir = ds.variables["flow_direction_value"][:]
faa = ds.variables["accumulation_area"][:]
lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]
# plot station positions and upstream areas
cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
lons2d=lon, lats2d=lat, accumulation_area_km2=faa)