当前位置: 首页>>代码示例>>Python>>正文


Python pandas.NA属性代码示例

本文整理汇总了Python中pandas.NA属性的典型用法代码示例。如果您正苦于以下问题:Python pandas.NA属性的具体用法?Python pandas.NA怎么用?Python pandas.NA使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在pandas的用法示例。


在下文中一共展示了pandas.NA属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_fips_ids

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def add_fips_ids(df, state_col="state", county_col="county", vintage=2015):
    """Add State and County FIPS IDs to a dataframe."""
    af = addfips.AddFIPS(vintage=vintage)
    # Lookup the state and county FIPS IDs and add them to the dataframe:
    df["state_id_fips"] = df.apply(
        lambda x: af.get_state_fips(state=x.state), axis=1)
    logger.info(
        f"Assigned state FIPS codes for "
        f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records."
    )
    df["county_id_fips"] = df.apply(
        lambda x: af.get_county_fips(state=x.state, county=x.county), axis=1)
    df["county_id_fips"] = df.county_id_fips.fillna(pd.NA)
    logger.info(
        f"Assigned county FIPS codes for "
        f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records."
    )
    return df 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:20,代码来源:helpers.py

示例2: fix_eia_na

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def fix_eia_na(df):
    """
    Replace common ill-posed EIA NA spreadsheet values with np.nan.

    Args:
        df (pandas.DataFrame): The DataFrame to clean.

    Returns:
        pandas.DataFrame: The cleaned DataFrame.

    Todo:
        Update docstring.

    """
    return df.replace(to_replace=[r'^\.$', r'^\s$', r'^$'],
                      value=np.nan, regex=True) 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:18,代码来源:helpers.py

示例3: __or__

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def __or__(self, other):
        """Compute vectorised or."""
        if not pa.types.is_boolean(self.dtype.arrow_dtype):
            raise NotImplementedError("__or__ is only supported for boolean arrays yet")

        if other is pd.NA or (pd.api.types.is_scalar(other) and pd.isna(other)):
            # All fields that are True stay True, all others get set to NA
            return type(self)(or_na(self.data))
        elif isinstance(other, bool):
            if other:
                # or with True yields all-True
                return type(self)(all_true(self.data))
            else:
                return self
        else:
            if isinstance(other, FletcherBaseArray):
                other = other.data
            return type(self)(or_vectorised(self.data, other)) 
开发者ID:xhochy,项目名称:fletcher,代码行数:20,代码来源:base.py

示例4: is_null

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def is_null(item: Any) -> bool:
        """
        Checks if a given item is null or correspond to null.

        This method checks for: None, numpy.nan, pandas.NA,
        pandas.NaT, "", and " "

        Parameters
        ----------
        item: Any
            The item to check

        Returns
        -------
        bool
            Whether the given item is null or not

        """
        null_values = {np.nan, pd.NA, pd.NaT, None, "", " "}
        return item in null_values 
开发者ID:NCATS-Tangerine,项目名称:kgx,代码行数:22,代码来源:pandas_transformer.py

示例5: test_to_parquet_file_dtype

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_to_parquet_file_dtype(path):
    df = pd.DataFrame({"c0": [1.0, None, 2.0], "c1": [pd.NA, pd.NA, pd.NA]})
    file_path = f"{path}0.parquet"
    wr.s3.to_parquet(df, file_path, dtype={"c0": "bigint", "c1": "string"})
    wr.s3.wait_objects_exist(paths=[file_path])
    df2 = wr.s3.read_parquet(file_path)
    assert df2.shape == df.shape
    assert df2.c0.sum() == 3
    assert str(df2.c0.dtype) == "Int64"
    assert str(df2.c1.dtype) == "string" 
开发者ID:awslabs,项目名称:aws-data-wrangler,代码行数:12,代码来源:test_s3.py

示例6: test_integer_na_values

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_integer_na_values(self, cursor):
        df = cursor.execute(
            """
            SELECT * FROM integer_na_values
            """
        ).as_pandas()
        rows = [tuple([row["a"], row["b"]]) for _, row in df.iterrows()]
        version = float(re.search(r"^([\d]+\.[\d]+)\..+", pd.__version__).group(1))
        if version >= 1.0:
            self.assertEqual(rows, [(1, 2), (1, pd.NA), (pd.NA, pd.NA)])
        else:
            self.assertEqual(rows, [(1, 2), (1, np.nan), (np.nan, np.nan)]) 
开发者ID:laughingman7743,项目名称:PyAthena,代码行数:14,代码来源:test_pandas_cursor.py

示例7: _load_plant_utc_offset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def _load_plant_utc_offset(datapkg_dir):
    """Load the UTC offset each EIA plant.

    CEMS times don't change for DST, so we get get the UTC offset by using the
    offset for the plants' timezones in January.

    Args:
        datapkg_dir (path-like) : Path to the directory of the datapackage
            which is currently being assembled.

    Returns:
        pandas.DataFrame: With columns plant_id_eia and utc_offset

    """
    import pytz

    jan1 = datetime.datetime(2011, 1, 1)  # year doesn't matter
    timezones = (
        pd.read_csv(
            pathlib.Path(datapkg_dir, 'data/plants_entity_eia.csv'),
            usecols=["plant_id_eia", "timezone"],
            dtype={"plant_id_eia": "Int64", "timezone": pd.StringDtype()})
        .replace(to_replace="None", value=pd.NA)
        .dropna()
    )

    timezones["utc_offset"] = (
        timezones["timezone"]
        .apply(lambda tz: pytz.timezone(tz).localize(jan1).utcoffset())
    )
    del timezones["timezone"]
    return timezones 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:34,代码来源:epacems.py

示例8: respondent_id

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def respondent_id(tfr_dfs):
    """
    Transform the FERC 714 respondent IDs, names, and EIA utility IDs.

    This consists primarily of dropping test respondents and manually
    assigning EIA utility IDs to a few FERC Form 714 respondents that report
    planning area demand, but which don't have their corresponding EIA utility
    IDs provided by FERC for some reason (including PacifiCorp).

    Args:
        tfr_dfs (dict): A dictionary of (partially) transformed dataframes,
            to be cleaned up.

    Returns:
        dict: The input dictionary of dataframes, but with a finished
        respondent_id_ferc714 dataframe.

    """
    df = (
        tfr_dfs["respondent_id_ferc714"].assign(
            utility_name_ferc714=lambda x: x.utility_name_ferc714.str.strip(),
            utility_id_eia=lambda x: x.utility_id_eia.replace(
                to_replace=0, value=pd.NA)
        )
        # These excludes fake Test IDs -- not real planning areas
        .query("utility_id_ferc714 not in @BAD_RESPONDENTS")
    )
    # There are a few utilities that seem mappable, but missing:
    for rid in MISSING_UTILITY_ID_EIA:
        df.loc[df.utility_id_ferc714 == rid,
               "utility_id_eia"] = MISSING_UTILITY_ID_EIA[rid]
    tfr_dfs["respondent_id_ferc714"] = df
    return tfr_dfs 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:35,代码来源:ferc714.py

示例9: fix_int_na

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def fix_int_na(df, columns, float_na=np.nan, int_na=-1, str_na=''):
    """Convert NA containing integer columns from float to string.

    Numpy doesn't have a real NA value for integers. When pandas stores integer
    data which has NA values, it thus upcasts integers to floating point
    values, using np.nan values for NA. However, in order to dump some of our
    dataframes to CSV files for use in data packages, we need to write out
    integer formatted numbers, with empty strings as the NA value. This
    function replaces np.nan values with a sentinel value, converts the column
    to integers, and then to strings, finally replacing the sentinel value with
    the desired NA string.

    This is an interim solution -- now that pandas extension arrays have been
    implemented, we need to go back through and convert all of these integer
    columns that contain NA values to Nullable Integer types like Int64.

    Args:
        df (pandas.DataFrame): The dataframe to be fixed. This argument allows
            method chaining with the pipe() method.
        columns (iterable of strings): A list of DataFrame column labels
            indicating which columns need to be reformatted for output.
        float_na (float): The floating point value to be interpreted as NA and
            replaced in col.
        int_na (int): Sentinel value to substitute for float_na prior to
            conversion of the column to integers.
        str_na (str): sa.String value to substitute for int_na after the column
            has been converted to strings.

    Returns:
        df (pandas.DataFrame): a new DataFrame, with the selected columns
        converted to strings that look like integers, compatible with
        the postgresql COPY FROM command.

    """
    return (
        df.replace({c: float_na for c in columns}, int_na)
          .astype({c: int for c in columns})
          .astype({c: str for c in columns})
          .replace({c: str(int_na) for c in columns}, str_na)
    ) 
开发者ID:catalyst-cooperative,项目名称:pudl,代码行数:42,代码来源:helpers.py

示例10: data_for_grouping

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def data_for_grouping(fletcher_type, fletcher_array):
    """Fixture with data for factorization, grouping, and unique tests.

    Expected to be like [B, B, NA, NA, A, A, B, C]

    Where A < B < C and NA is missing
    """
    return fletcher_array(fletcher_type.data_for_grouping, dtype=fletcher_type.dtype) 
开发者ID:xhochy,项目名称:fletcher,代码行数:10,代码来源:test_pandas_extension.py

示例11: data_missing_for_sorting

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def data_missing_for_sorting(fletcher_type, fletcher_array):
    """Length-3 array with a known sort order.

    This should be three items [B, NA, A] with
    A < B and NA missing.
    """
    return fletcher_array(
        fletcher_type.data_missing_for_sorting, dtype=fletcher_type.dtype
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:11,代码来源:test_pandas_extension.py

示例12: test_np_any

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_np_any(fletcher_array):
    arr = fletcher_array([True, False, None])
    assert np.any(arr)

    arr = fletcher_array([True, False, True])
    assert np.any(arr)

    # TODO(pandas-0.26): Uncomment this when BooleanArray landed.
    #   Then we change the behaviour.
    # arr = fr.FletcherChunkedArray([False, False, None])
    # assert np.any(arr) is pd.NA

    arr = fletcher_array([False, False, False])
    assert not np.any(arr) 
开发者ID:xhochy,项目名称:fletcher,代码行数:16,代码来源:test_boolean.py

示例13: test_or

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def test_or(fletcher_array):
    # Scalar versions
    # non-null versions
    result = fletcher_array([True, False]) | pd.NA
    expected = fletcher_array([True, None])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | pd.NA
    expected = fletcher_array([True, None, None])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | True
    expected = fletcher_array([True, True, True])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | False
    expected = fletcher_array([True, False, None])
    pdt.assert_extension_array_equal(result, expected)

    # Array version
    # Non-null version
    result = fletcher_array([True, False, False]) | fletcher_array([False, True, False])
    expected = fletcher_array([True, True, False])
    pdt.assert_extension_array_equal(result, expected)
    # One has nulls, the other not
    result = fletcher_array([True, False, None, None]) | fletcher_array(
        [False, True, False, True]
    )
    expected = fletcher_array([True, True, None, True])
    pdt.assert_extension_array_equal(result, expected)
    # Both have nulls
    result = fletcher_array([True, False, None, None]) | fletcher_array(
        [None, True, False, True]
    )
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None, None]) | np.array(
        [False, True, False, True]
    )
    pdt.assert_extension_array_equal(result, expected) 
开发者ID:xhochy,项目名称:fletcher,代码行数:42,代码来源:test_boolean.py

示例14: isfinite

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def isfinite(val):
    """
    Helper function to determine if scalar or array value is finite extending
    np.isfinite with support for None, string, datetime types.
    """
    is_dask = is_dask_array(val)
    if not np.isscalar(val) and not is_dask:
        val = asarray(val, strict=False)

    if val is None:
        return False
    elif is_dask:
        import dask.array as da
        return da.isfinite(val)
    elif isinstance(val, np.ndarray):
        if val.dtype.kind == 'M':
            return ~isnat(val)
        elif val.dtype.kind == 'O':
            return np.array([isfinite(v) for v in val], dtype=bool)
        elif val.dtype.kind in 'US':
            return ~pd.isna(val) if pd else np.ones_like(val, dtype=bool)
        finite = np.isfinite(val)
        if pd and pandas_version >= '1.0.0':
            finite &= ~pd.isna(val)
        return finite
    elif isinstance(val, datetime_types+timedelta_types):
        return not isnat(val)
    elif isinstance(val, (basestring, bytes)):
        return True
    finite = np.isfinite(val)
    if pd and pandas_version >= '1.0.0':
        if finite is pd.NA:
            return False
        return finite & (~pd.isna(val))
    return finite 
开发者ID:holoviz,项目名称:holoviews,代码行数:37,代码来源:util.py

示例15: argmax

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NA [as 别名]
def argmax(self, axis=None, skipna=True, *args, **kwargs):
        result = self.idxmax(axis=axis, skipna=skipna, *args, **kwargs)
        if np.isnan(result) or result is pandas.NA:
            result = -1
        return result 
开发者ID:modin-project,项目名称:modin,代码行数:7,代码来源:series.py


注:本文中的pandas.NA属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。