當前位置: 首頁>>代碼示例>>Python>>正文


Python DataFrame.groupby方法代碼示例

本文整理匯總了Python中pandas.core.frame.DataFrame.groupby方法的典型用法代碼示例。如果您正苦於以下問題:Python DataFrame.groupby方法的具體用法?Python DataFrame.groupby怎麽用?Python DataFrame.groupby使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pandas.core.frame.DataFrame的用法示例。


在下文中一共展示了DataFrame.groupby方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_daily_normals

# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
    def get_daily_normals(self, start_date = None, end_date = None, stamp_year = 2001):
        """
        :type start_date: datetime.datetime
        :type end_date: datetime.datetime
        :rtype : list , list
        """
        self.stamp_day_dates = pandas.DatetimeIndex(start = datetime(stamp_year,1,1), end = date(stamp_year, 12, 31),
            freq = pandas.datetools.offsets.Day())

        if start_date is None:
            start_date = self.time[0]

        if end_date is None:
            end_date = self.time[-1]


        di = pandas.DatetimeIndex(data = self.time)
        df = DataFrame(data = self.data, index = di, columns=["values",])


        df = df.select( lambda d: start_date <= d <= end_date )
        df_mean = df.groupby(by = lambda d: (d.day, d.month)).mean()


        return self.stamp_day_dates, df_mean.ix[[ (d.day, d.month) for d in self.stamp_day_dates] ,"values"]
開發者ID:guziy,項目名稱:RPN,代碼行數:27,代碼來源:timeseries.py

示例2: main

# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]

#.........這裏部分代碼省略.........

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]



    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        _, obs_clim_stfl = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=stamp_dates, years=years)

        if obs_clim_stfl is None:
            continue

        ax.plot(stamp_dates, obs_clim_stfl, "k", lw=3, label="Obs")

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.select(lambda d: not (d.month == 2 and d.day == 29))
            df = df.groupby(lambda d: datetime(stamp_dates[0].year, d.month, d.day)).mean()

            daily_model_data = [df.ix[d, "value"] for d in stamp_dates]

            # print np.mean( monthly_model ), s.river_name, sim_label
            ax.plot(stamp_dates, daily_model_data, color, lw=3, label=sim_label + "(C)")

            if plot_future:
                ax.plot(stamp_dates, daily_model_data, color + "--", lw=3, label=sim_label + "(F2)")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("offline_validation.png", dpi=400)
    plt.close(fig)


    r = RPN("/RESCUE/skynet3_rech1/huziy/CNRCWP/C3/Depth_to_bedrock_WestNA_0.25")
    r.get_first_record_for_name("8L")
    proj_params = r.get_proj_parameters_for_the_last_read_rec()
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
    bsmp = RotatedLatLon(**proj_params).get_basemap_object_for_lons_lats(lons2d=lons, lats2d=lats)
    plot_utils.apply_plot_params(width_pt=None, width_cm=19, height_cm=19, font_size=12)
    plot_station_positions(manager=None, station_list=stations, bsmp=bsmp)
開發者ID:guziy,項目名稱:RPN,代碼行數:104,代碼來源:validate_streamflow_with_obs.py

示例3: arrays_add

# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]
                all.add(id)

                arrays_add(id, "[" + data.test_build.branch + "][" + data.testrun.suite + "]", data)
                output_file.write(str(id) + "\t" + json)
            except Exception, e:
                Log.warning("can not process line:\n\t" + line, e)

        smallest = min(*all)
        Log.println("First id >= date: {{min}}", {"min": smallest})

df = DataFrame(arrays, columns=["id", "path", "length", "count"])
colNames = [str(p) + " to " + str(parts[i + 1] - 1) for i, p in enumerate(parts[0:-1])]

# http://pandas.pydata.org/pandas-docs/stable/groupby.html#na-group-handling
length_dim = pandas.cut(df.length, parts, labels=colNames, right=False)
summary = df.groupby(["path", length_dim], sort=False).size()
#summary=summary.reindex(length_dim, level="length")
table = summary.unstack("length")
s = CNV.DataFrame2string(table)#, columns=colNames)
Log.println("\n" + s)
with open("talos_big_array_summary.tab", "w") as output_file:
    output_file.write(s)

sum2 = df.groupby(["path", "length"]).size()
tab2 = sum2.unstack("length")
s = CNV.DataFrame2string(tab2)#, columns=colNames)
Log.println("\n" + s)
with open("talos_every_population.tab", "w") as output_file:
    output_file.write(s)

biggest = df[df.length == 63000]
開發者ID:klahnakoski,項目名稱:Datazilla2ElasticSearch,代碼行數:33,代碼來源:summarize_talos.py

示例4: main

# 需要導入模塊: from pandas.core.frame import DataFrame [as 別名]
# 或者: from pandas.core.frame.DataFrame import groupby [as 別名]

#.........這裏部分代碼省略.........
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()


            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")


            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)






    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                                   lons2d=lon, lats2d=lat, accumulation_area_km2=faa)
開發者ID:guziy,項目名稱:RPN,代碼行數:69,代碼來源:validate_streamflow_with_obs_mh_edition.py


注:本文中的pandas.core.frame.DataFrame.groupby方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。