Python pandas.StringDtype方法代码示例

本文整理汇总了Python中pandas.StringDtype方法的典型用法代码示例。如果您正苦于以下问题：Python pandas.StringDtype方法的具体用法？Python pandas.StringDtype怎么用？Python pandas.StringDtype使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas的用法示例。

在下文中一共展示了pandas.StringDtype方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: table_type

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def table_type(df_column):
    # Note - this only works with Pandas >= 1.0.0

    if sys.version_info < (3, 0):  # Pandas 1.0.0 does not support Python 2
        return 'any'

    if isinstance(df_column.dtype, pd.DatetimeTZDtype):
        return 'datetime',
    elif (isinstance(df_column.dtype, pd.StringDtype) or
            isinstance(df_column.dtype, pd.BooleanDtype) or
            isinstance(df_column.dtype, pd.CategoricalDtype) or
            isinstance(df_column.dtype, pd.PeriodDtype)):
        return 'text'
    elif (isinstance(df_column.dtype, pd.SparseDtype) or
            isinstance(df_column.dtype, pd.IntervalDtype) or
            isinstance(df_column.dtype, pd.Int8Dtype) or
            isinstance(df_column.dtype, pd.Int16Dtype) or
            isinstance(df_column.dtype, pd.Int32Dtype) or
            isinstance(df_column.dtype, pd.Int64Dtype)):
        return 'numeric'
    else:
        return 'any'

开发者ID:plotly，项目名称:dash-docs，代码行数:24，代码来源:filtering_fe_autotype.py

示例2: _load_plant_utc_offset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def _load_plant_utc_offset(datapkg_dir):
    """Load the UTC offset each EIA plant.

    CEMS times don't change for DST, so we get get the UTC offset by using the
    offset for the plants' timezones in January.

    Args:
        datapkg_dir (path-like) : Path to the directory of the datapackage
            which is currently being assembled.

    Returns:
        pandas.DataFrame: With columns plant_id_eia and utc_offset

    """
    import pytz

    jan1 = datetime.datetime(2011, 1, 1)  # year doesn't matter
    timezones = (
        pd.read_csv(
            pathlib.Path(datapkg_dir, 'data/plants_entity_eia.csv'),
            usecols=["plant_id_eia", "timezone"],
            dtype={"plant_id_eia": "Int64", "timezone": pd.StringDtype()})
        .replace(to_replace="None", value=pd.NA)
        .dropna()
    )

    timezones["utc_offset"] = (
        timezones["timezone"]
        .apply(lambda tz: pytz.timezone(tz).localize(jan1).utcoffset())
    )
    del timezones["timezone"]
    return timezones

开发者ID:catalyst-cooperative，项目名称:pudl，代码行数:34，代码来源:epacems.py

示例3: _pandas_string_type

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def _pandas_string_type():
    try:
        return pd.StringDtype()
    except AttributeError:
        return np.object

开发者ID:mlflow，项目名称:mlflow，代码行数:7，代码来源:schema.py

示例4: _add_schema

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def _add_schema(df):
    """Add default chat schema to df.

    Args:
        df (pandas.DataFrame): Chat dataframe.

    Returns:
        pandas.DataFrame: Chat dataframe with correct dtypes.

    """
    df = df.astype({
        COLNAMES_DF.USERNAME: pd.StringDtype(),
        COLNAMES_DF.MESSAGE: pd.StringDtype()
    })
    return df

开发者ID:lucasrodes，项目名称:whatstk，代码行数:17，代码来源:parser.py

示例5: test_pandas_extension_types

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def test_pandas_extension_types():
    """Test pandas extension data type happy path."""
    # pylint: disable=no-member
    test_params = [
        (
            pd.CategoricalDtype(),
            pd.Series(["a", "a", "b", "b", "c", "c"], dtype="category"),
            None
        ),
        (
            pd.DatetimeTZDtype(tz='UTC'),
            pd.Series(
                pd.date_range(start="20200101", end="20200301"),
                dtype="datetime64[ns, utc]"
            ),
            None
        ),
        (pd.Int64Dtype(), pd.Series(range(10), dtype="Int64"), None),
        (pd.StringDtype(), pd.Series(["foo", "bar", "baz"], dtype="string"), None),
        (
            pd.PeriodDtype(freq='D'),
            pd.Series(pd.period_range('1/1/2019', '1/1/2020', freq='D')),
            None
        ),
        (
            pd.SparseDtype("float"),
            pd.Series(range(100)).where(
                lambda s: s < 5, other=np.nan).astype("Sparse[float]"),
            {"nullable": True},
        ),
        (
            pd.BooleanDtype(),
            pd.Series([1, 0, 0, 1, 1], dtype="boolean"),
            None
        ),
        (
            pd.IntervalDtype(subtype="int64"),
            pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])),
            None,
        )
    ]
    for dtype, data, series_kwargs in test_params:
        series_kwargs = {} if series_kwargs is None else series_kwargs
        series_schema = SeriesSchema(pandas_dtype=dtype, **series_kwargs)
        assert isinstance(series_schema.validate(data), pd.Series)

开发者ID:pandera-dev，项目名称:pandera，代码行数:47，代码来源:test_dtypes.py

示例6: electricity_planning_areas

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def electricity_planning_areas(pudl_settings):
    """Electric Planning Area geometries from HIFLD."""
    gdb_path = pathlib.Path(
        pudl_settings["data_dir"],
        "local/hifld/electric_planning_areas.gdb"
    )

    gdf = (
        geopandas.read_file(gdb_path)
        .assign(
            SOURCEDATE=lambda x: pd.to_datetime(x.SOURCEDATE),
            VAL_DATE=lambda x: pd.to_datetime(x.VAL_DATE),
            ID=lambda x: pd.to_numeric(x.ID),
            NAICS_CODE=lambda x: pd.to_numeric(x.NAICS_CODE),
            YEAR=lambda x: pd.to_numeric(x.YEAR),
        )
        # Hack to work around geopanda issue fixed as of v0.8.0
        # https://github.com/geopandas/geopandas/issues/1366
        .assign(
            ID=lambda x: x.ID.astype(pd.Int64Dtype()),
            NAME=lambda x: x.NAME.astype(pd.StringDtype()),
            COUNTRY=lambda x: x.COUNTRY.astype(pd.StringDtype()),
            NAICS_CODE=lambda x: x.NAICS_CODE.astype(pd.Int64Dtype()),
            NAICS_DESC=lambda x: x.NAICS_DESC.astype(pd.StringDtype()),
            SOURCE=lambda x: x.SOURCE.astype(pd.StringDtype()),
            VAL_METHOD=lambda x: x.VAL_METHOD.astype(pd.StringDtype()),
            WEBSITE=lambda x: x.WEBSITE.astype(pd.StringDtype()),
            ABBRV=lambda x: x.ABBRV.astype(pd.StringDtype()),
            YEAR=lambda x: x.YEAR.astype(pd.Int64Dtype()),
            PEAK_LOAD=lambda x: x.PEAK_LOAD.astype(float),
            PEAK_RANGE=lambda x: x.PEAK_RANGE.astype(float),
            SHAPE_Length=lambda x: x.SHAPE_Length.astype(float),
            SHAPE_Area=lambda x: x.SHAPE_Area.astype(float),
        )
    )
    # Need to set these IDs b/c HIFLD geometry uses EIA Balancing Authority IDs
    # (maybe?) FERC 714 is using EIA Utility IDs. This isn't totally resolved
    # and we need to figure out which set of IDs is getting used where.
    gdf.loc[gdf.ID == 2775, "ID"] = 229  # CAISO
    gdf.loc[gdf.ID == 59504, "ID"] = 17690  # Southwest Power Pool
    gdf.loc[gdf.ID == 14379, "ID"] = 14354  # PacifiCorp East + West
    gdf.loc[gdf.ID == 13670, "ID"] = 39347  # Northeast TX Electric Co-op
    return gdf

开发者ID:catalyst-cooperative，项目名称:pudl，代码行数:45，代码来源:ferc714.py

示例7: ownership

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def ownership(eia860_dfs, eia860_transformed_dfs):
    """
    Pulls and transforms the ownership table.

    Args:
        eia860_dfs (dict): Each entry in this dictionary of DataFrame objects
            corresponds to a page from the EIA860 form, as reported in the
            Excel spreadsheets they distribute
        eia860_transformed_dfs (dict): A dictionary of DataFrame objects in
            which pages from EIA860 form (keys) correspond to normalized
            DataFrames of values from that page (values)

    Returns:
        dict: eia860_transformed_dfs, a dictionary of DataFrame objects in
        which pages from EIA860 form (keys) correspond to normalized
        DataFrames of values from that page (values)

    """
    o_df = (
        eia860_dfs['ownership'].copy()
        .pipe(pudl.helpers.fix_eia_na)
        .pipe(pudl.helpers.convert_to_date)
    )

    # The fix we're making here is only known to be valid for 2011 -- if we
    # get older data... then we need to to revisit the cleaning function and
    # make sure it also applies to those earlier years.
    if min(o_df.report_date.dt.year) < min(pc.working_years["eia860"]):
        raise ValueError(
            f"EIA 860 transform step is only known to work for "
            f"year {min(pc.working_years['eia860'])} and later, but found data "
            f"from year {min(o_df.report_date.dt.year)}."
        )

    # Prior to 2012, ownership was reported as a percentage, rather than
    # as a proportion, so we need to divide those values by 100.
    o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] = \
        o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] / 100

    o_df = (
        o_df.astype({
            "owner_utility_id_eia": pd.Int64Dtype(),
            "utility_id_eia": pd.Int64Dtype(),
            "plant_id_eia": pd.Int64Dtype(),
            "owner_state": pd.StringDtype()
        })
    )

    eia860_transformed_dfs['ownership_eia860'] = o_df

    return eia860_transformed_dfs

开发者ID:catalyst-cooperative，项目名称:pudl，代码行数:53，代码来源:eia860.py

示例8: create_in_dtypes

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import StringDtype [as 别名]
def create_in_dtypes():
    """
    Create a dictionary of input data types.

    This specifies the dtypes of the input columns, which is necessary for some
    cases where, e.g., a column is always NaN.

    Returns:
        dict: mapping columns names to :mod:`pandas` data types.

    """
    # These measurement codes are used by all four of our measurement variables
    common_codes = (
        "LME",
        "Measured",
        "Measured and Substitute",
        "Other",
        "Substitute",
        "Undetermined",
        "Unknown Code",
        "",
    )
    co2_so2_cats = pd.CategoricalDtype(categories=common_codes, ordered=False)
    nox_cats = pd.CategoricalDtype(
        categories=common_codes + ("Calculated",), ordered=False
    )
    state_cats = pd.CategoricalDtype(
        categories=pc.cems_states.keys(), ordered=False)
    in_dtypes = {
        "state": state_cats,
        "plant_id_eia": "int32",
        "unitid": pd.StringDtype(),
        # "operating_datetime_utc": "datetime",
        "operating_time_hours": "float32",
        "gross_load_mw": "float32",
        "steam_load_1000_lbs": "float32",
        "so2_mass_lbs": "float32",
        "so2_mass_measurement_code": co2_so2_cats,
        "nox_rate_lbs_mmbtu": "float32",
        "nox_rate_measurement_code": nox_cats,
        "nox_mass_lbs": "float32",
        "nox_mass_measurement_code": nox_cats,
        "co2_mass_tons": "float32",
        "co2_mass_measurement_code": co2_so2_cats,
        "heat_content_mmbtu": "float32",
        "facility_id": pd.Int32Dtype(),
        "unit_id_epa": pd.Int32Dtype(),
    }
    return in_dtypes

开发者ID:catalyst-cooperative，项目名称:pudl，代码行数:51，代码来源:epacems_to_parquet.py

注：本文中的pandas.StringDtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。