本文整理汇总了Python中pandas.NA属性的典型用法代码示例。如果您正苦于以下问题:Python pandas.NA属性的具体用法?Python pandas.NA怎么用?Python pandas.NA使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类pandas
的用法示例。
在下文中一共展示了pandas.NA属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_fips_ids
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def add_fips_ids(df, state_col="state", county_col="county", vintage=2015):
"""Add State and County FIPS IDs to a dataframe."""
af = addfips.AddFIPS(vintage=vintage)
# Lookup the state and county FIPS IDs and add them to the dataframe:
df["state_id_fips"] = df.apply(
lambda x: af.get_state_fips(state=x.state), axis=1)
logger.info(
f"Assigned state FIPS codes for "
f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records."
)
df["county_id_fips"] = df.apply(
lambda x: af.get_county_fips(state=x.state, county=x.county), axis=1)
df["county_id_fips"] = df.county_id_fips.fillna(pd.NA)
logger.info(
f"Assigned county FIPS codes for "
f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records."
)
return df
示例2: fix_eia_na
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def fix_eia_na(df):
"""
Replace common ill-posed EIA NA spreadsheet values with np.nan.
Args:
df (pandas.DataFrame): The DataFrame to clean.
Returns:
pandas.DataFrame: The cleaned DataFrame.
Todo:
Update docstring.
"""
return df.replace(to_replace=[r'^\.$', r'^\s$', r'^$'],
value=np.nan, regex=True)
示例3: __or__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def __or__(self, other):
"""Compute vectorised or."""
if not pa.types.is_boolean(self.dtype.arrow_dtype):
raise NotImplementedError("__or__ is only supported for boolean arrays yet")
if other is pd.NA or (pd.api.types.is_scalar(other) and pd.isna(other)):
# All fields that are True stay True, all others get set to NA
return type(self)(or_na(self.data))
elif isinstance(other, bool):
if other:
# or with True yields all-True
return type(self)(all_true(self.data))
else:
return self
else:
if isinstance(other, FletcherBaseArray):
other = other.data
return type(self)(or_vectorised(self.data, other))
示例4: is_null
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def is_null(item: Any) -> bool:
"""
Checks if a given item is null or correspond to null.
This method checks for: None, numpy.nan, pandas.NA,
pandas.NaT, "", and " "
Parameters
----------
item: Any
The item to check
Returns
-------
bool
Whether the given item is null or not
"""
null_values = {np.nan, pd.NA, pd.NaT, None, "", " "}
return item in null_values
示例5: test_to_parquet_file_dtype
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_to_parquet_file_dtype(path):
df = pd.DataFrame({"c0": [1.0, None, 2.0], "c1": [pd.NA, pd.NA, pd.NA]})
file_path = f"{path}0.parquet"
wr.s3.to_parquet(df, file_path, dtype={"c0": "bigint", "c1": "string"})
wr.s3.wait_objects_exist(paths=[file_path])
df2 = wr.s3.read_parquet(file_path)
assert df2.shape == df.shape
assert df2.c0.sum() == 3
assert str(df2.c0.dtype) == "Int64"
assert str(df2.c1.dtype) == "string"
示例6: test_integer_na_values
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_integer_na_values(self, cursor):
df = cursor.execute(
"""
SELECT * FROM integer_na_values
"""
).as_pandas()
rows = [tuple([row["a"], row["b"]]) for _, row in df.iterrows()]
version = float(re.search(r"^([\d]+\.[\d]+)\..+", pd.__version__).group(1))
if version >= 1.0:
self.assertEqual(rows, [(1, 2), (1, pd.NA), (pd.NA, pd.NA)])
else:
self.assertEqual(rows, [(1, 2), (1, np.nan), (np.nan, np.nan)])
示例7: _load_plant_utc_offset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def _load_plant_utc_offset(datapkg_dir):
"""Load the UTC offset each EIA plant.
CEMS times don't change for DST, so we get get the UTC offset by using the
offset for the plants' timezones in January.
Args:
datapkg_dir (path-like) : Path to the directory of the datapackage
which is currently being assembled.
Returns:
pandas.DataFrame: With columns plant_id_eia and utc_offset
"""
import pytz
jan1 = datetime.datetime(2011, 1, 1) # year doesn't matter
timezones = (
pd.read_csv(
pathlib.Path(datapkg_dir, 'data/plants_entity_eia.csv'),
usecols=["plant_id_eia", "timezone"],
dtype={"plant_id_eia": "Int64", "timezone": pd.StringDtype()})
.replace(to_replace="None", value=pd.NA)
.dropna()
)
timezones["utc_offset"] = (
timezones["timezone"]
.apply(lambda tz: pytz.timezone(tz).localize(jan1).utcoffset())
)
del timezones["timezone"]
return timezones
示例8: respondent_id
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def respondent_id(tfr_dfs):
"""
Transform the FERC 714 respondent IDs, names, and EIA utility IDs.
This consists primarily of dropping test respondents and manually
assigning EIA utility IDs to a few FERC Form 714 respondents that report
planning area demand, but which don't have their corresponding EIA utility
IDs provided by FERC for some reason (including PacifiCorp).
Args:
tfr_dfs (dict): A dictionary of (partially) transformed dataframes,
to be cleaned up.
Returns:
dict: The input dictionary of dataframes, but with a finished
respondent_id_ferc714 dataframe.
"""
df = (
tfr_dfs["respondent_id_ferc714"].assign(
utility_name_ferc714=lambda x: x.utility_name_ferc714.str.strip(),
utility_id_eia=lambda x: x.utility_id_eia.replace(
to_replace=0, value=pd.NA)
)
# These excludes fake Test IDs -- not real planning areas
.query("utility_id_ferc714 not in @BAD_RESPONDENTS")
)
# There are a few utilities that seem mappable, but missing:
for rid in MISSING_UTILITY_ID_EIA:
df.loc[df.utility_id_ferc714 == rid,
"utility_id_eia"] = MISSING_UTILITY_ID_EIA[rid]
tfr_dfs["respondent_id_ferc714"] = df
return tfr_dfs
示例9: fix_int_na
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def fix_int_na(df, columns, float_na=np.nan, int_na=-1, str_na=''):
"""Convert NA containing integer columns from float to string.
Numpy doesn't have a real NA value for integers. When pandas stores integer
data which has NA values, it thus upcasts integers to floating point
values, using np.nan values for NA. However, in order to dump some of our
dataframes to CSV files for use in data packages, we need to write out
integer formatted numbers, with empty strings as the NA value. This
function replaces np.nan values with a sentinel value, converts the column
to integers, and then to strings, finally replacing the sentinel value with
the desired NA string.
This is an interim solution -- now that pandas extension arrays have been
implemented, we need to go back through and convert all of these integer
columns that contain NA values to Nullable Integer types like Int64.
Args:
df (pandas.DataFrame): The dataframe to be fixed. This argument allows
method chaining with the pipe() method.
columns (iterable of strings): A list of DataFrame column labels
indicating which columns need to be reformatted for output.
float_na (float): The floating point value to be interpreted as NA and
replaced in col.
int_na (int): Sentinel value to substitute for float_na prior to
conversion of the column to integers.
str_na (str): sa.String value to substitute for int_na after the column
has been converted to strings.
Returns:
df (pandas.DataFrame): a new DataFrame, with the selected columns
converted to strings that look like integers, compatible with
the postgresql COPY FROM command.
"""
return (
df.replace({c: float_na for c in columns}, int_na)
.astype({c: int for c in columns})
.astype({c: str for c in columns})
.replace({c: str(int_na) for c in columns}, str_na)
)
示例10: data_for_grouping
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def data_for_grouping(fletcher_type, fletcher_array):
"""Fixture with data for factorization, grouping, and unique tests.
Expected to be like [B, B, NA, NA, A, A, B, C]
Where A < B < C and NA is missing
"""
return fletcher_array(fletcher_type.data_for_grouping, dtype=fletcher_type.dtype)
示例11: data_missing_for_sorting
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def data_missing_for_sorting(fletcher_type, fletcher_array):
"""Length-3 array with a known sort order.
This should be three items [B, NA, A] with
A < B and NA missing.
"""
return fletcher_array(
fletcher_type.data_missing_for_sorting, dtype=fletcher_type.dtype
)
示例12: test_np_any
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_np_any(fletcher_array):
arr = fletcher_array([True, False, None])
assert np.any(arr)
arr = fletcher_array([True, False, True])
assert np.any(arr)
# TODO(pandas-0.26): Uncomment this when BooleanArray landed.
# Then we change the behaviour.
# arr = fr.FletcherChunkedArray([False, False, None])
# assert np.any(arr) is pd.NA
arr = fletcher_array([False, False, False])
assert not np.any(arr)
示例13: test_or
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_or(fletcher_array):
# Scalar versions
# non-null versions
result = fletcher_array([True, False]) | pd.NA
expected = fletcher_array([True, None])
pdt.assert_extension_array_equal(result, expected)
result = fletcher_array([True, False, None]) | pd.NA
expected = fletcher_array([True, None, None])
pdt.assert_extension_array_equal(result, expected)
result = fletcher_array([True, False, None]) | True
expected = fletcher_array([True, True, True])
pdt.assert_extension_array_equal(result, expected)
result = fletcher_array([True, False, None]) | False
expected = fletcher_array([True, False, None])
pdt.assert_extension_array_equal(result, expected)
# Array version
# Non-null version
result = fletcher_array([True, False, False]) | fletcher_array([False, True, False])
expected = fletcher_array([True, True, False])
pdt.assert_extension_array_equal(result, expected)
# One has nulls, the other not
result = fletcher_array([True, False, None, None]) | fletcher_array(
[False, True, False, True]
)
expected = fletcher_array([True, True, None, True])
pdt.assert_extension_array_equal(result, expected)
# Both have nulls
result = fletcher_array([True, False, None, None]) | fletcher_array(
[None, True, False, True]
)
pdt.assert_extension_array_equal(result, expected)
result = fletcher_array([True, False, None, None]) | np.array(
[False, True, False, True]
)
pdt.assert_extension_array_equal(result, expected)
示例14: isfinite
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def isfinite(val):
"""
Helper function to determine if scalar or array value is finite extending
np.isfinite with support for None, string, datetime types.
"""
is_dask = is_dask_array(val)
if not np.isscalar(val) and not is_dask:
val = asarray(val, strict=False)
if val is None:
return False
elif is_dask:
import dask.array as da
return da.isfinite(val)
elif isinstance(val, np.ndarray):
if val.dtype.kind == 'M':
return ~isnat(val)
elif val.dtype.kind == 'O':
return np.array([isfinite(v) for v in val], dtype=bool)
elif val.dtype.kind in 'US':
return ~pd.isna(val) if pd else np.ones_like(val, dtype=bool)
finite = np.isfinite(val)
if pd and pandas_version >= '1.0.0':
finite &= ~pd.isna(val)
return finite
elif isinstance(val, datetime_types+timedelta_types):
return not isnat(val)
elif isinstance(val, (basestring, bytes)):
return True
finite = np.isfinite(val)
if pd and pandas_version >= '1.0.0':
if finite is pd.NA:
return False
return finite & (~pd.isna(val))
return finite
示例15: argmax
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def argmax(self, axis=None, skipna=True, *args, **kwargs):
result = self.idxmax(axis=axis, skipna=skipna, *args, **kwargs)
if np.isnan(result) or result is pandas.NA:
result = -1
return result