当前位置: 首页>>代码示例>>Python>>正文


Python pandas.Int64Dtype方法代码示例

本文整理汇总了Python中pandas.Int64Dtype方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.Int64Dtype方法的具体用法?Python pandas.Int64Dtype怎么用?Python pandas.Int64Dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.Int64Dtype方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _dtypes

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def _dtypes(self):
        if not hasattr(self, "__dtypes"):
            import pandas as pd

            self.__dtypes = {
                "tinyint": pd.Int64Dtype(),
                "smallint": pd.Int64Dtype(),
                "integer": pd.Int64Dtype(),
                "bigint": pd.Int64Dtype(),
                "float": float,
                "real": float,
                "double": float,
                "char": str,
                "varchar": str,
                "string": str,
                "array": str,
                "map": str,
                "row": str,
            }
        return self.__dtypes 
开发者ID:laughingman7743,项目名称:PyAthena,代码行数:22,代码来源:converter.py

示例2: table_type

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def table_type(df_column):
    # Note - this only works with Pandas >= 1.0.0

    if sys.version_info < (3, 0):  # Pandas 1.0.0 does not support Python 2
        return 'any'

    if isinstance(df_column.dtype, pd.DatetimeTZDtype):
        return 'datetime',
    elif (isinstance(df_column.dtype, pd.StringDtype) or
            isinstance(df_column.dtype, pd.BooleanDtype) or
            isinstance(df_column.dtype, pd.CategoricalDtype) or
            isinstance(df_column.dtype, pd.PeriodDtype)):
        return 'text'
    elif (isinstance(df_column.dtype, pd.SparseDtype) or
            isinstance(df_column.dtype, pd.IntervalDtype) or
            isinstance(df_column.dtype, pd.Int8Dtype) or
            isinstance(df_column.dtype, pd.Int16Dtype) or
            isinstance(df_column.dtype, pd.Int32Dtype) or
            isinstance(df_column.dtype, pd.Int64Dtype)):
        return 'numeric'
    else:
        return 'any' 
开发者ID:plotly,项目名称:dash-docs,代码行数:24,代码来源:filtering_fe_autotype.py

示例3: _fix_int_dtypes

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def _fix_int_dtypes(self, df):
        """
        Mutate DataFrame to set dtypes for int columns containing NaN values."
        """
        for col in df:
            if "float" in df[col].dtype.name and df[col].hasnans:
                # inspect values to determine if dtype of non-null values is int or float
                notna_series = df[col].dropna().values
                if np.isclose(notna_series, notna_series.astype(int)).all():
                    # set to dtype that retains integers and supports NaNs
                    df[col] = np.where(df[col].isnull(), None, df[col]).astype(pd.Int64Dtype) 
开发者ID:apache,项目名称:airflow,代码行数:13,代码来源:mysql_to_s3.py

示例4: get_dtypes

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def get_dtypes(year, page):
        """Returns dtypes for plant id columns."""
        return {
            "Plant ID": pd.Int64Dtype(),
            "Plant Id": pd.Int64Dtype(),
        } 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:8,代码来源:eia923.py

示例5: links

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def links(self):
        """
        The movie link table, connecting movie IDs to external identifiers.  It is indexed
        by movie ID.

        >>> mlsmall = MovieLens('data/ml-latest-small')
        >>> mlsmall.links
                 imdbId  tmdbId
        item
        1        114709     862
        2        113497    8844
        3        113228   15602
        4        114885   31357
        5        113041   11862
        ...
        [9125 rows x 2 columns]
        """

        fn = self.path / 'links.csv'
        links = pd.read_csv(fn, dtype={
            'movieId': np.int32,
            'imdbId': np.int64,
            'tmdbId': pd.Int64Dtype()
        })
        links.rename(columns={'movieId': 'item'}, inplace=True)
        links.set_index('item', inplace=True)
        _log.debug('loaded %s, takes %d bytes', fn, links.memory_usage().sum())
        return links 
开发者ID:lenskit,项目名称:lkpy,代码行数:30,代码来源:datasets.py

示例6: test_nullable_int_unsupported

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_nullable_int_unsupported(self):
        dataframe = pd.DataFrame(
            {
                # We don't support nullable integer columns ... yet
                "A": pd.Series([1, np.nan], dtype=pd.Int64Dtype())
            }
        )
        with self.assertRaisesRegex(ValueError, "unsupported dtype"):
            validate_dataframe(dataframe) 
开发者ID:CJWorkbench,项目名称:cjworkbench,代码行数:11,代码来源:test_validate.py

示例7: test_to_pandas_nullable_int

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_to_pandas_nullable_int(self):
        import pandas as pd

        for camel_case in [False, True]:
            assert (
                pd.Int64Dtype()
                == AssetList([Asset(parent_id=123), Asset(parent_id=None)]).to_pandas(camel_case=camel_case).dtypes[0]
            ) 
开发者ID:cognitedata,项目名称:cognite-sdk-python,代码行数:10,代码来源:test_assets.py

示例8: test_pandas_extension_types

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_pandas_extension_types():
    """Test pandas extension data type happy path."""
    # pylint: disable=no-member
    test_params = [
        (
            pd.CategoricalDtype(),
            pd.Series(["a", "a", "b", "b", "c", "c"], dtype="category"),
            None
        ),
        (
            pd.DatetimeTZDtype(tz='UTC'),
            pd.Series(
                pd.date_range(start="20200101", end="20200301"),
                dtype="datetime64[ns, utc]"
            ),
            None
        ),
        (pd.Int64Dtype(), pd.Series(range(10), dtype="Int64"), None),
        (pd.StringDtype(), pd.Series(["foo", "bar", "baz"], dtype="string"), None),
        (
            pd.PeriodDtype(freq='D'),
            pd.Series(pd.period_range('1/1/2019', '1/1/2020', freq='D')),
            None
        ),
        (
            pd.SparseDtype("float"),
            pd.Series(range(100)).where(
                lambda s: s < 5, other=np.nan).astype("Sparse[float]"),
            {"nullable": True},
        ),
        (
            pd.BooleanDtype(),
            pd.Series([1, 0, 0, 1, 1], dtype="boolean"),
            None
        ),
        (
            pd.IntervalDtype(subtype="int64"),
            pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])),
            None,
        )
    ]
    for dtype, data, series_kwargs in test_params:
        series_kwargs = {} if series_kwargs is None else series_kwargs
        series_schema = SeriesSchema(pandas_dtype=dtype, **series_kwargs)
        assert isinstance(series_schema.validate(data), pd.Series) 
开发者ID:pandera-dev,项目名称:pandera,代码行数:47,代码来源:test_dtypes.py

示例9: electricity_planning_areas

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def electricity_planning_areas(pudl_settings):
    """Electric Planning Area geometries from HIFLD."""
    gdb_path = pathlib.Path(
        pudl_settings["data_dir"],
        "local/hifld/electric_planning_areas.gdb"
    )

    gdf = (
        geopandas.read_file(gdb_path)
        .assign(
            SOURCEDATE=lambda x: pd.to_datetime(x.SOURCEDATE),
            VAL_DATE=lambda x: pd.to_datetime(x.VAL_DATE),
            ID=lambda x: pd.to_numeric(x.ID),
            NAICS_CODE=lambda x: pd.to_numeric(x.NAICS_CODE),
            YEAR=lambda x: pd.to_numeric(x.YEAR),
        )
        # Hack to work around geopanda issue fixed as of v0.8.0
        # https://github.com/geopandas/geopandas/issues/1366
        .assign(
            ID=lambda x: x.ID.astype(pd.Int64Dtype()),
            NAME=lambda x: x.NAME.astype(pd.StringDtype()),
            COUNTRY=lambda x: x.COUNTRY.astype(pd.StringDtype()),
            NAICS_CODE=lambda x: x.NAICS_CODE.astype(pd.Int64Dtype()),
            NAICS_DESC=lambda x: x.NAICS_DESC.astype(pd.StringDtype()),
            SOURCE=lambda x: x.SOURCE.astype(pd.StringDtype()),
            VAL_METHOD=lambda x: x.VAL_METHOD.astype(pd.StringDtype()),
            WEBSITE=lambda x: x.WEBSITE.astype(pd.StringDtype()),
            ABBRV=lambda x: x.ABBRV.astype(pd.StringDtype()),
            YEAR=lambda x: x.YEAR.astype(pd.Int64Dtype()),
            PEAK_LOAD=lambda x: x.PEAK_LOAD.astype(float),
            PEAK_RANGE=lambda x: x.PEAK_RANGE.astype(float),
            SHAPE_Length=lambda x: x.SHAPE_Length.astype(float),
            SHAPE_Area=lambda x: x.SHAPE_Area.astype(float),
        )
    )
    # Need to set these IDs b/c HIFLD geometry uses EIA Balancing Authority IDs
    # (maybe?) FERC 714 is using EIA Utility IDs. This isn't totally resolved
    # and we need to figure out which set of IDs is getting used where.
    gdf.loc[gdf.ID == 2775, "ID"] = 229  # CAISO
    gdf.loc[gdf.ID == 59504, "ID"] = 17690  # Southwest Power Pool
    gdf.loc[gdf.ID == 14379, "ID"] = 14354  # PacifiCorp East + West
    gdf.loc[gdf.ID == 13670, "ID"] = 39347  # Northeast TX Electric Co-op
    return gdf 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:45,代码来源:ferc714.py

示例10: ownership

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def ownership(eia860_dfs, eia860_transformed_dfs):
    """
    Pulls and transforms the ownership table.

    Args:
        eia860_dfs (dict): Each entry in this dictionary of DataFrame objects
            corresponds to a page from the EIA860 form, as reported in the
            Excel spreadsheets they distribute
        eia860_transformed_dfs (dict): A dictionary of DataFrame objects in
            which pages from EIA860 form (keys) correspond to normalized
            DataFrames of values from that page (values)

    Returns:
        dict: eia860_transformed_dfs, a dictionary of DataFrame objects in
        which pages from EIA860 form (keys) correspond to normalized
        DataFrames of values from that page (values)

    """
    o_df = (
        eia860_dfs['ownership'].copy()
        .pipe(pudl.helpers.fix_eia_na)
        .pipe(pudl.helpers.convert_to_date)
    )

    # The fix we're making here is only known to be valid for 2011 -- if we
    # get older data... then we need to to revisit the cleaning function and
    # make sure it also applies to those earlier years.
    if min(o_df.report_date.dt.year) < min(pc.working_years["eia860"]):
        raise ValueError(
            f"EIA 860 transform step is only known to work for "
            f"year {min(pc.working_years['eia860'])} and later, but found data "
            f"from year {min(o_df.report_date.dt.year)}."
        )

    # Prior to 2012, ownership was reported as a percentage, rather than
    # as a proportion, so we need to divide those values by 100.
    o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] = \
        o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] / 100

    o_df = (
        o_df.astype({
            "owner_utility_id_eia": pd.Int64Dtype(),
            "utility_id_eia": pd.Int64Dtype(),
            "plant_id_eia": pd.Int64Dtype(),
            "owner_state": pd.StringDtype()
        })
    )

    eia860_transformed_dfs['ownership_eia860'] = o_df

    return eia860_transformed_dfs 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:53,代码来源:eia860.py


注:本文中的pandas.Int64Dtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。