當前位置: 首頁>>代碼示例>>Python>>正文


Python pandas.NA屬性代碼示例

本文整理匯總了Python中pandas.NA屬性的典型用法代碼示例。如果您正苦於以下問題:Python pandas.NA屬性的具體用法?Python pandas.NA怎麽用?Python pandas.NA使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在pandas的用法示例。


在下文中一共展示了pandas.NA屬性的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: add_fips_ids

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def add_fips_ids(df, state_col="state", county_col="county", vintage=2015):
    """Add State and County FIPS IDs to a dataframe."""
    af = addfips.AddFIPS(vintage=vintage)
    # Lookup the state and county FIPS IDs and add them to the dataframe:
    df["state_id_fips"] = df.apply(
        lambda x: af.get_state_fips(state=x.state), axis=1)
    logger.info(
        f"Assigned state FIPS codes for "
        f"{len(df[df.state_id_fips.notnull()])/len(df):.2%} of records."
    )
    df["county_id_fips"] = df.apply(
        lambda x: af.get_county_fips(state=x.state, county=x.county), axis=1)
    df["county_id_fips"] = df.county_id_fips.fillna(pd.NA)
    logger.info(
        f"Assigned county FIPS codes for "
        f"{len(df[df.county_id_fips.notnull()])/len(df):.2%} of records."
    )
    return df 
開發者ID:catalyst-cooperative,項目名稱:pudl,代碼行數:20,代碼來源:helpers.py

示例2: fix_eia_na

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def fix_eia_na(df):
    """
    Replace common ill-posed EIA NA spreadsheet values with np.nan.

    Args:
        df (pandas.DataFrame): The DataFrame to clean.

    Returns:
        pandas.DataFrame: The cleaned DataFrame.

    Todo:
        Update docstring.

    """
    return df.replace(to_replace=[r'^\.$', r'^\s$', r'^$'],
                      value=np.nan, regex=True) 
開發者ID:catalyst-cooperative,項目名稱:pudl,代碼行數:18,代碼來源:helpers.py

示例3: __or__

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def __or__(self, other):
        """Compute vectorised or."""
        if not pa.types.is_boolean(self.dtype.arrow_dtype):
            raise NotImplementedError("__or__ is only supported for boolean arrays yet")

        if other is pd.NA or (pd.api.types.is_scalar(other) and pd.isna(other)):
            # All fields that are True stay True, all others get set to NA
            return type(self)(or_na(self.data))
        elif isinstance(other, bool):
            if other:
                # or with True yields all-True
                return type(self)(all_true(self.data))
            else:
                return self
        else:
            if isinstance(other, FletcherBaseArray):
                other = other.data
            return type(self)(or_vectorised(self.data, other)) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:20,代碼來源:base.py

示例4: is_null

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def is_null(item: Any) -> bool:
        """
        Checks if a given item is null or correspond to null.

        This method checks for: None, numpy.nan, pandas.NA,
        pandas.NaT, "", and " "

        Parameters
        ----------
        item: Any
            The item to check

        Returns
        -------
        bool
            Whether the given item is null or not

        """
        null_values = {np.nan, pd.NA, pd.NaT, None, "", " "}
        return item in null_values 
開發者ID:NCATS-Tangerine,項目名稱:kgx,代碼行數:22,代碼來源:pandas_transformer.py

示例5: test_to_parquet_file_dtype

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def test_to_parquet_file_dtype(path):
    df = pd.DataFrame({"c0": [1.0, None, 2.0], "c1": [pd.NA, pd.NA, pd.NA]})
    file_path = f"{path}0.parquet"
    wr.s3.to_parquet(df, file_path, dtype={"c0": "bigint", "c1": "string"})
    wr.s3.wait_objects_exist(paths=[file_path])
    df2 = wr.s3.read_parquet(file_path)
    assert df2.shape == df.shape
    assert df2.c0.sum() == 3
    assert str(df2.c0.dtype) == "Int64"
    assert str(df2.c1.dtype) == "string" 
開發者ID:awslabs,項目名稱:aws-data-wrangler,代碼行數:12,代碼來源:test_s3.py

示例6: test_integer_na_values

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def test_integer_na_values(self, cursor):
        df = cursor.execute(
            """
            SELECT * FROM integer_na_values
            """
        ).as_pandas()
        rows = [tuple([row["a"], row["b"]]) for _, row in df.iterrows()]
        version = float(re.search(r"^([\d]+\.[\d]+)\..+", pd.__version__).group(1))
        if version >= 1.0:
            self.assertEqual(rows, [(1, 2), (1, pd.NA), (pd.NA, pd.NA)])
        else:
            self.assertEqual(rows, [(1, 2), (1, np.nan), (np.nan, np.nan)]) 
開發者ID:laughingman7743,項目名稱:PyAthena,代碼行數:14,代碼來源:test_pandas_cursor.py

示例7: _load_plant_utc_offset

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def _load_plant_utc_offset(datapkg_dir):
    """Load the UTC offset each EIA plant.

    CEMS times don't change for DST, so we get get the UTC offset by using the
    offset for the plants' timezones in January.

    Args:
        datapkg_dir (path-like) : Path to the directory of the datapackage
            which is currently being assembled.

    Returns:
        pandas.DataFrame: With columns plant_id_eia and utc_offset

    """
    import pytz

    jan1 = datetime.datetime(2011, 1, 1)  # year doesn't matter
    timezones = (
        pd.read_csv(
            pathlib.Path(datapkg_dir, 'data/plants_entity_eia.csv'),
            usecols=["plant_id_eia", "timezone"],
            dtype={"plant_id_eia": "Int64", "timezone": pd.StringDtype()})
        .replace(to_replace="None", value=pd.NA)
        .dropna()
    )

    timezones["utc_offset"] = (
        timezones["timezone"]
        .apply(lambda tz: pytz.timezone(tz).localize(jan1).utcoffset())
    )
    del timezones["timezone"]
    return timezones 
開發者ID:catalyst-cooperative,項目名稱:pudl,代碼行數:34,代碼來源:epacems.py

示例8: respondent_id

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def respondent_id(tfr_dfs):
    """
    Transform the FERC 714 respondent IDs, names, and EIA utility IDs.

    This consists primarily of dropping test respondents and manually
    assigning EIA utility IDs to a few FERC Form 714 respondents that report
    planning area demand, but which don't have their corresponding EIA utility
    IDs provided by FERC for some reason (including PacifiCorp).

    Args:
        tfr_dfs (dict): A dictionary of (partially) transformed dataframes,
            to be cleaned up.

    Returns:
        dict: The input dictionary of dataframes, but with a finished
        respondent_id_ferc714 dataframe.

    """
    df = (
        tfr_dfs["respondent_id_ferc714"].assign(
            utility_name_ferc714=lambda x: x.utility_name_ferc714.str.strip(),
            utility_id_eia=lambda x: x.utility_id_eia.replace(
                to_replace=0, value=pd.NA)
        )
        # These excludes fake Test IDs -- not real planning areas
        .query("utility_id_ferc714 not in @BAD_RESPONDENTS")
    )
    # There are a few utilities that seem mappable, but missing:
    for rid in MISSING_UTILITY_ID_EIA:
        df.loc[df.utility_id_ferc714 == rid,
               "utility_id_eia"] = MISSING_UTILITY_ID_EIA[rid]
    tfr_dfs["respondent_id_ferc714"] = df
    return tfr_dfs 
開發者ID:catalyst-cooperative,項目名稱:pudl,代碼行數:35,代碼來源:ferc714.py

示例9: fix_int_na

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def fix_int_na(df, columns, float_na=np.nan, int_na=-1, str_na=''):
    """Convert NA containing integer columns from float to string.

    Numpy doesn't have a real NA value for integers. When pandas stores integer
    data which has NA values, it thus upcasts integers to floating point
    values, using np.nan values for NA. However, in order to dump some of our
    dataframes to CSV files for use in data packages, we need to write out
    integer formatted numbers, with empty strings as the NA value. This
    function replaces np.nan values with a sentinel value, converts the column
    to integers, and then to strings, finally replacing the sentinel value with
    the desired NA string.

    This is an interim solution -- now that pandas extension arrays have been
    implemented, we need to go back through and convert all of these integer
    columns that contain NA values to Nullable Integer types like Int64.

    Args:
        df (pandas.DataFrame): The dataframe to be fixed. This argument allows
            method chaining with the pipe() method.
        columns (iterable of strings): A list of DataFrame column labels
            indicating which columns need to be reformatted for output.
        float_na (float): The floating point value to be interpreted as NA and
            replaced in col.
        int_na (int): Sentinel value to substitute for float_na prior to
            conversion of the column to integers.
        str_na (str): sa.String value to substitute for int_na after the column
            has been converted to strings.

    Returns:
        df (pandas.DataFrame): a new DataFrame, with the selected columns
        converted to strings that look like integers, compatible with
        the postgresql COPY FROM command.

    """
    return (
        df.replace({c: float_na for c in columns}, int_na)
          .astype({c: int for c in columns})
          .astype({c: str for c in columns})
          .replace({c: str(int_na) for c in columns}, str_na)
    ) 
開發者ID:catalyst-cooperative,項目名稱:pudl,代碼行數:42,代碼來源:helpers.py

示例10: data_for_grouping

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def data_for_grouping(fletcher_type, fletcher_array):
    """Fixture with data for factorization, grouping, and unique tests.

    Expected to be like [B, B, NA, NA, A, A, B, C]

    Where A < B < C and NA is missing
    """
    return fletcher_array(fletcher_type.data_for_grouping, dtype=fletcher_type.dtype) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:10,代碼來源:test_pandas_extension.py

示例11: data_missing_for_sorting

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def data_missing_for_sorting(fletcher_type, fletcher_array):
    """Length-3 array with a known sort order.

    This should be three items [B, NA, A] with
    A < B and NA missing.
    """
    return fletcher_array(
        fletcher_type.data_missing_for_sorting, dtype=fletcher_type.dtype
    ) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:11,代碼來源:test_pandas_extension.py

示例12: test_np_any

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def test_np_any(fletcher_array):
    arr = fletcher_array([True, False, None])
    assert np.any(arr)

    arr = fletcher_array([True, False, True])
    assert np.any(arr)

    # TODO(pandas-0.26): Uncomment this when BooleanArray landed.
    #   Then we change the behaviour.
    # arr = fr.FletcherChunkedArray([False, False, None])
    # assert np.any(arr) is pd.NA

    arr = fletcher_array([False, False, False])
    assert not np.any(arr) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:16,代碼來源:test_boolean.py

示例13: test_or

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def test_or(fletcher_array):
    # Scalar versions
    # non-null versions
    result = fletcher_array([True, False]) | pd.NA
    expected = fletcher_array([True, None])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | pd.NA
    expected = fletcher_array([True, None, None])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | True
    expected = fletcher_array([True, True, True])
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None]) | False
    expected = fletcher_array([True, False, None])
    pdt.assert_extension_array_equal(result, expected)

    # Array version
    # Non-null version
    result = fletcher_array([True, False, False]) | fletcher_array([False, True, False])
    expected = fletcher_array([True, True, False])
    pdt.assert_extension_array_equal(result, expected)
    # One has nulls, the other not
    result = fletcher_array([True, False, None, None]) | fletcher_array(
        [False, True, False, True]
    )
    expected = fletcher_array([True, True, None, True])
    pdt.assert_extension_array_equal(result, expected)
    # Both have nulls
    result = fletcher_array([True, False, None, None]) | fletcher_array(
        [None, True, False, True]
    )
    pdt.assert_extension_array_equal(result, expected)

    result = fletcher_array([True, False, None, None]) | np.array(
        [False, True, False, True]
    )
    pdt.assert_extension_array_equal(result, expected) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:42,代碼來源:test_boolean.py

示例14: isfinite

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def isfinite(val):
    """
    Helper function to determine if scalar or array value is finite extending
    np.isfinite with support for None, string, datetime types.
    """
    is_dask = is_dask_array(val)
    if not np.isscalar(val) and not is_dask:
        val = asarray(val, strict=False)

    if val is None:
        return False
    elif is_dask:
        import dask.array as da
        return da.isfinite(val)
    elif isinstance(val, np.ndarray):
        if val.dtype.kind == 'M':
            return ~isnat(val)
        elif val.dtype.kind == 'O':
            return np.array([isfinite(v) for v in val], dtype=bool)
        elif val.dtype.kind in 'US':
            return ~pd.isna(val) if pd else np.ones_like(val, dtype=bool)
        finite = np.isfinite(val)
        if pd and pandas_version >= '1.0.0':
            finite &= ~pd.isna(val)
        return finite
    elif isinstance(val, datetime_types+timedelta_types):
        return not isnat(val)
    elif isinstance(val, (basestring, bytes)):
        return True
    finite = np.isfinite(val)
    if pd and pandas_version >= '1.0.0':
        if finite is pd.NA:
            return False
        return finite & (~pd.isna(val))
    return finite 
開發者ID:holoviz,項目名稱:holoviews,代碼行數:37,代碼來源:util.py

示例15: argmax

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import NA [as 別名]
def argmax(self, axis=None, skipna=True, *args, **kwargs):
        result = self.idxmax(axis=axis, skipna=skipna, *args, **kwargs)
        if np.isnan(result) or result is pandas.NA:
            result = -1
        return result 
開發者ID:modin-project,項目名稱:modin,代碼行數:7,代碼來源:series.py


注:本文中的pandas.NA屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。