本文整理汇总了Python中pandas.Int64Dtype方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.Int64Dtype方法的具体用法?Python pandas.Int64Dtype怎么用?Python pandas.Int64Dtype使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.Int64Dtype方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _dtypes
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def _dtypes(self):
if not hasattr(self, "__dtypes"):
import pandas as pd
self.__dtypes = {
"tinyint": pd.Int64Dtype(),
"smallint": pd.Int64Dtype(),
"integer": pd.Int64Dtype(),
"bigint": pd.Int64Dtype(),
"float": float,
"real": float,
"double": float,
"char": str,
"varchar": str,
"string": str,
"array": str,
"map": str,
"row": str,
}
return self.__dtypes
示例2: table_type
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def table_type(df_column):
# Note - this only works with Pandas >= 1.0.0
if sys.version_info < (3, 0): # Pandas 1.0.0 does not support Python 2
return 'any'
if isinstance(df_column.dtype, pd.DatetimeTZDtype):
return 'datetime',
elif (isinstance(df_column.dtype, pd.StringDtype) or
isinstance(df_column.dtype, pd.BooleanDtype) or
isinstance(df_column.dtype, pd.CategoricalDtype) or
isinstance(df_column.dtype, pd.PeriodDtype)):
return 'text'
elif (isinstance(df_column.dtype, pd.SparseDtype) or
isinstance(df_column.dtype, pd.IntervalDtype) or
isinstance(df_column.dtype, pd.Int8Dtype) or
isinstance(df_column.dtype, pd.Int16Dtype) or
isinstance(df_column.dtype, pd.Int32Dtype) or
isinstance(df_column.dtype, pd.Int64Dtype)):
return 'numeric'
else:
return 'any'
示例3: _fix_int_dtypes
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def _fix_int_dtypes(self, df):
"""
Mutate DataFrame to set dtypes for int columns containing NaN values."
"""
for col in df:
if "float" in df[col].dtype.name and df[col].hasnans:
# inspect values to determine if dtype of non-null values is int or float
notna_series = df[col].dropna().values
if np.isclose(notna_series, notna_series.astype(int)).all():
# set to dtype that retains integers and supports NaNs
df[col] = np.where(df[col].isnull(), None, df[col]).astype(pd.Int64Dtype)
示例4: get_dtypes
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def get_dtypes(year, page):
"""Returns dtypes for plant id columns."""
return {
"Plant ID": pd.Int64Dtype(),
"Plant Id": pd.Int64Dtype(),
}
示例5: links
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def links(self):
"""
The movie link table, connecting movie IDs to external identifiers. It is indexed
by movie ID.
>>> mlsmall = MovieLens('data/ml-latest-small')
>>> mlsmall.links
imdbId tmdbId
item
1 114709 862
2 113497 8844
3 113228 15602
4 114885 31357
5 113041 11862
...
[9125 rows x 2 columns]
"""
fn = self.path / 'links.csv'
links = pd.read_csv(fn, dtype={
'movieId': np.int32,
'imdbId': np.int64,
'tmdbId': pd.Int64Dtype()
})
links.rename(columns={'movieId': 'item'}, inplace=True)
links.set_index('item', inplace=True)
_log.debug('loaded %s, takes %d bytes', fn, links.memory_usage().sum())
return links
示例6: test_nullable_int_unsupported
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_nullable_int_unsupported(self):
dataframe = pd.DataFrame(
{
# We don't support nullable integer columns ... yet
"A": pd.Series([1, np.nan], dtype=pd.Int64Dtype())
}
)
with self.assertRaisesRegex(ValueError, "unsupported dtype"):
validate_dataframe(dataframe)
示例7: test_to_pandas_nullable_int
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_to_pandas_nullable_int(self):
import pandas as pd
for camel_case in [False, True]:
assert (
pd.Int64Dtype()
== AssetList([Asset(parent_id=123), Asset(parent_id=None)]).to_pandas(camel_case=camel_case).dtypes[0]
)
示例8: test_pandas_extension_types
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def test_pandas_extension_types():
"""Test pandas extension data type happy path."""
# pylint: disable=no-member
test_params = [
(
pd.CategoricalDtype(),
pd.Series(["a", "a", "b", "b", "c", "c"], dtype="category"),
None
),
(
pd.DatetimeTZDtype(tz='UTC'),
pd.Series(
pd.date_range(start="20200101", end="20200301"),
dtype="datetime64[ns, utc]"
),
None
),
(pd.Int64Dtype(), pd.Series(range(10), dtype="Int64"), None),
(pd.StringDtype(), pd.Series(["foo", "bar", "baz"], dtype="string"), None),
(
pd.PeriodDtype(freq='D'),
pd.Series(pd.period_range('1/1/2019', '1/1/2020', freq='D')),
None
),
(
pd.SparseDtype("float"),
pd.Series(range(100)).where(
lambda s: s < 5, other=np.nan).astype("Sparse[float]"),
{"nullable": True},
),
(
pd.BooleanDtype(),
pd.Series([1, 0, 0, 1, 1], dtype="boolean"),
None
),
(
pd.IntervalDtype(subtype="int64"),
pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])),
None,
)
]
for dtype, data, series_kwargs in test_params:
series_kwargs = {} if series_kwargs is None else series_kwargs
series_schema = SeriesSchema(pandas_dtype=dtype, **series_kwargs)
assert isinstance(series_schema.validate(data), pd.Series)
示例9: electricity_planning_areas
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def electricity_planning_areas(pudl_settings):
"""Electric Planning Area geometries from HIFLD."""
gdb_path = pathlib.Path(
pudl_settings["data_dir"],
"local/hifld/electric_planning_areas.gdb"
)
gdf = (
geopandas.read_file(gdb_path)
.assign(
SOURCEDATE=lambda x: pd.to_datetime(x.SOURCEDATE),
VAL_DATE=lambda x: pd.to_datetime(x.VAL_DATE),
ID=lambda x: pd.to_numeric(x.ID),
NAICS_CODE=lambda x: pd.to_numeric(x.NAICS_CODE),
YEAR=lambda x: pd.to_numeric(x.YEAR),
)
# Hack to work around geopanda issue fixed as of v0.8.0
# https://github.com/geopandas/geopandas/issues/1366
.assign(
ID=lambda x: x.ID.astype(pd.Int64Dtype()),
NAME=lambda x: x.NAME.astype(pd.StringDtype()),
COUNTRY=lambda x: x.COUNTRY.astype(pd.StringDtype()),
NAICS_CODE=lambda x: x.NAICS_CODE.astype(pd.Int64Dtype()),
NAICS_DESC=lambda x: x.NAICS_DESC.astype(pd.StringDtype()),
SOURCE=lambda x: x.SOURCE.astype(pd.StringDtype()),
VAL_METHOD=lambda x: x.VAL_METHOD.astype(pd.StringDtype()),
WEBSITE=lambda x: x.WEBSITE.astype(pd.StringDtype()),
ABBRV=lambda x: x.ABBRV.astype(pd.StringDtype()),
YEAR=lambda x: x.YEAR.astype(pd.Int64Dtype()),
PEAK_LOAD=lambda x: x.PEAK_LOAD.astype(float),
PEAK_RANGE=lambda x: x.PEAK_RANGE.astype(float),
SHAPE_Length=lambda x: x.SHAPE_Length.astype(float),
SHAPE_Area=lambda x: x.SHAPE_Area.astype(float),
)
)
# Need to set these IDs b/c HIFLD geometry uses EIA Balancing Authority IDs
# (maybe?) FERC 714 is using EIA Utility IDs. This isn't totally resolved
# and we need to figure out which set of IDs is getting used where.
gdf.loc[gdf.ID == 2775, "ID"] = 229 # CAISO
gdf.loc[gdf.ID == 59504, "ID"] = 17690 # Southwest Power Pool
gdf.loc[gdf.ID == 14379, "ID"] = 14354 # PacifiCorp East + West
gdf.loc[gdf.ID == 13670, "ID"] = 39347 # Northeast TX Electric Co-op
return gdf
示例10: ownership
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Int64Dtype [as 别名]
def ownership(eia860_dfs, eia860_transformed_dfs):
"""
Pulls and transforms the ownership table.
Args:
eia860_dfs (dict): Each entry in this dictionary of DataFrame objects
corresponds to a page from the EIA860 form, as reported in the
Excel spreadsheets they distribute
eia860_transformed_dfs (dict): A dictionary of DataFrame objects in
which pages from EIA860 form (keys) correspond to normalized
DataFrames of values from that page (values)
Returns:
dict: eia860_transformed_dfs, a dictionary of DataFrame objects in
which pages from EIA860 form (keys) correspond to normalized
DataFrames of values from that page (values)
"""
o_df = (
eia860_dfs['ownership'].copy()
.pipe(pudl.helpers.fix_eia_na)
.pipe(pudl.helpers.convert_to_date)
)
# The fix we're making here is only known to be valid for 2011 -- if we
# get older data... then we need to to revisit the cleaning function and
# make sure it also applies to those earlier years.
if min(o_df.report_date.dt.year) < min(pc.working_years["eia860"]):
raise ValueError(
f"EIA 860 transform step is only known to work for "
f"year {min(pc.working_years['eia860'])} and later, but found data "
f"from year {min(o_df.report_date.dt.year)}."
)
# Prior to 2012, ownership was reported as a percentage, rather than
# as a proportion, so we need to divide those values by 100.
o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] = \
o_df.loc[o_df.report_date.dt.year < 2012, 'fraction_owned'] / 100
o_df = (
o_df.astype({
"owner_utility_id_eia": pd.Int64Dtype(),
"utility_id_eia": pd.Int64Dtype(),
"plant_id_eia": pd.Int64Dtype(),
"owner_state": pd.StringDtype()
})
)
eia860_transformed_dfs['ownership_eia860'] = o_df
return eia860_transformed_dfs