當前位置: 首頁>>代碼示例>>Python>>正文


Python types.is_categorical_dtype方法代碼示例

本文整理匯總了Python中pandas.api.types.is_categorical_dtype方法的典型用法代碼示例。如果您正苦於以下問題:Python types.is_categorical_dtype方法的具體用法?Python types.is_categorical_dtype怎麽用?Python types.is_categorical_dtype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pandas.api.types的用法示例。


在下文中一共展示了types.is_categorical_dtype方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: write_series

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
    # group here is an h5py type, otherwise categoricals won’t write
    if series.dtype == object:  # Assuming it’s string
        group.create_dataset(
            key,
            data=series.values,
            dtype=h5py.special_dtype(vlen=str),
            **dataset_kwargs,
        )
    elif is_categorical_dtype(series):
        # This should work for categorical Index and Series
        categorical: pd.Categorical = series.values
        categories: np.ndarray = categorical.categories.values
        codes: np.ndarray = categorical.codes
        category_key = f"__categories/{key}"

        write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
        write_array(group, key, codes, dataset_kwargs=dataset_kwargs)

        group[key].attrs["categories"] = group[category_key].ref
        group[category_key].attrs["ordered"] = categorical.ordered
    else:
        group[key] = series.values 
開發者ID:theislab,項目名稱:anndata,代碼行數:25,代碼來源:h5ad.py

示例2: write_series

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
    if series.dtype == object:
        group.create_dataset(
            key,
            shape=series.shape,
            dtype=object,
            object_codec=numcodecs.VLenUTF8(),
            **dataset_kwargs,
        )
        group[key][:] = series.values
    elif is_categorical_dtype(series):
        # This should work for categorical Index and Series
        categorical: pd.Categorical = series.values
        categories: np.ndarray = categorical.categories.values
        codes: np.ndarray = categorical.codes
        category_key = f"__categories/{key}"

        write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
        write_array(group, key, codes, dataset_kwargs=dataset_kwargs)

        group[key].attrs["categories"] = category_key
        # Must coerce np.bool_ to bool for json writing
        group[category_key].attrs["ordered"] = bool(categorical.ordered)
    else:
        group[key] = series.values 
開發者ID:theislab,項目名稱:anndata,代碼行數:27,代碼來源:zarr.py

示例3: _fit

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _fit(self, X: DataFrameType):
        if self.columns is None:
            columns = X.select_dtypes(include=["object", "category"]).columns
        else:
            columns = self.columns
        categories = {}
        for name in columns:
            col = X[name]
            if not is_categorical_dtype(col):
                # This shouldn't ever be hit on a dask.array, since
                # the object columns would have been converted to known cats
                # already
                col = pd.Series(col, index=X.index).astype("category")

            if _HAS_CTD:
                categories[name] = col.dtype
            else:
                categories[name] = (col.cat.categories, col.cat.ordered)

        return columns, categories 
開發者ID:dask,項目名稱:dask-ml,代碼行數:22,代碼來源:data.py

示例4: _add_group_columns

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _add_group_columns(data, gdf):
    """
    Add group columns to data with a value from the grouped dataframe

    It is assumed that the grouped dataframe contains a single group

    >>> data = pd.DataFrame({
    ...     'x': [5, 6, 7]})
    >>> gdf = GroupedDataFrame({
    ...     'g': list('aaa'),
    ...     'x': range(3)}, groups=['g'])
    >>> _add_group_columns(data, gdf)
       g  x
    0  a  5
    1  a  6
    2  a  7
    """
    n = len(data)
    if isinstance(gdf, GroupedDataFrame):
        for i, col in enumerate(gdf.plydata_groups):
            if col not in data:
                group_values = [gdf[col].iloc[0]] * n
                # Need to be careful and maintain the dtypes
                # of the group columns
                if pdtypes.is_categorical_dtype(gdf[col]):
                    col_values = pd.Categorical(
                        group_values,
                        categories=gdf[col].cat.categories,
                        ordered=gdf[col].cat.ordered
                    )
                else:
                    col_values = pd.Series(
                        group_values,
                        index=data.index,
                        dtype=gdf[col].dtype
                    )
                # Group columns come first
                data.insert(i, col, col_values)
    return data 
開發者ID:has2k1,項目名稱:plydata,代碼行數:41,代碼來源:common.py

示例5: test_summarize

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_summarize():
    df = pd.DataFrame({'x': [1, 5, 2, 2, 4, 0, 4],
                       'y': [1, 2, 3, 4, 5, 6, 5],
                       'z': [1, 3, 3, 4, 5, 5, 5]})

    result = df >> summarize('np.sum(x)', max='np.max(x)')
    assert result.loc[0, 'max'] == np.max(df['x'])
    assert result.loc[0, 'np.sum(x)'] == np.sum(df['x'])

    result = df >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
    assert 'y' in result
    assert 'z' in result
    assert all(result['mean_x'] == [1, 5, 2, 2, 4, 0])

    # (Name, Expression) tuples
    result = df >> summarize(('sum', 'np.sum(x)'), ('max', 'np.max(x)'))
    assert 'sum' in result
    assert 'max' in result

    # Branches
    result = df >> group_by('y') >> summarize('np.sum(z)', constant=1)
    assert 'y' in result
    assert result.loc[0, 'constant'] == 1

    # Category stays category
    df1 = df.copy()
    df1['z'] = pd.Categorical(df1['z'])
    result = df1 >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
    assert result['y'].dtype == np.int
    assert pdtypes.is_categorical_dtype(result['z']) 
開發者ID:has2k1,項目名稱:plydata,代碼行數:32,代碼來源:test_dataframe.py

示例6: test_group_by_all

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_group_by_all():
    df = pd.DataFrame({
        'alpha': list('aaabbb'),
        'beta': list('babruq'),
        'theta': list('cdecde'),
        'x': [1, 2, 3, 4, 5, 6],
        'y': [6, 5, 4, 3, 2, 1],
        'z': [7, 9, 11, 8, 10, 12]
    })

    result = df >> group_by_all()
    assert len(df.columns) == len(result.columns)
    assert len(df.columns) == len(result.plydata_groups)

    result = df >> group_by_all(pd.Categorical)
    assert len(df.columns) == len(result.columns)
    assert len(df.columns) == len(result.plydata_groups)

    result = df >> group_by_all(dict(cat=pd.Categorical))
    assert len(df.columns)*2 == len(result.columns)
    for col in df.columns:
        col_cat = '{}_cat'.format(col)
        assert not pdtypes.is_categorical_dtype(result[col])
        assert pdtypes.is_categorical_dtype(result[col_cat])

    result = (df
              >> group_by('x')
              >> group_by_all(dict(cat=pd.Categorical)))
    assert result.plydata_groups == [
        '{}_cat'.format(col) for col in df.columns if col != 'x']
    assert len(df.columns)*2-1 == len(result.columns)
    assert 'x_cat' not in result 
開發者ID:has2k1,項目名稱:plydata,代碼行數:34,代碼來源:test_dataframe.py

示例7: get_errors

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def get_errors(self, series: pd.Series, column: 'column.Column'):

        errors = []

        # Calculate which columns are valid using the child class's validate function, skipping empty entries if the
        # column specifies to do so
        simple_validation = ~self.validate(series)
        if column.allow_empty:
            # Failing results are those that are not empty, and fail the validation
            # explicitly check to make sure the series isn't a category because issubdtype will FAIL if it is
            if is_categorical_dtype(series) or is_numeric_dtype(series):
                validated = ~series.isnull() & simple_validation
            else:
                validated = (series.str.len() > 0) & simple_validation

        else:
            validated = simple_validation

        # Cut down the original series to only ones that failed the validation
        indices = series.index[validated]

        # Use these indices to find the failing items. Also print the index which is probably a row number
        for i in indices:
            element = series[i]
            errors.append(ValidationWarning(
                message=self.message,
                value=element,
                row=i,
                column=series.name
            ))

        return errors 
開發者ID:TMiguelT,項目名稱:PandasSchema,代碼行數:34,代碼來源:validation.py

示例8: _id_var

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _id_var(x, drop=False):
    """
    Assign ids to items in x. If two items
    are the same, they get the same id.

    Parameters
    ----------
    x : array-like
        items to associate ids with
    drop : bool
        Whether to drop unused factor levels
    """
    if len(x) == 0:
        return []

    categorical = pdtypes.is_categorical_dtype(x)

    if categorical:
        if drop:
            x = x.cat.remove_unused_categories()
            lst = list(x.cat.codes + 1)
        else:
            has_nan = any(np.isnan(i) for i in x if isinstance(i, float))
            if has_nan:
                # NaNs are -1, we give them the highest code
                nan_code = -1
                new_nan_code = np.max(x.cat.codes) + 1
                lst = [val if val != nan_code else new_nan_code for val in x]
            else:
                lst = list(x.cat.codes + 1)
    else:
        try:
            levels = np.sort(np.unique(x))
        except TypeError:
            # x probably has NANs
            levels = multitype_sort(set(x))

        lst = match(x, levels)
        lst = [item + 1 for item in lst]

    return lst 
開發者ID:has2k1,項目名稱:plotnine,代碼行數:43,代碼來源:utils.py

示例9: get_var_type

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def get_var_type(col):
    """
    Return var_type (for KDEMultivariate) of the column

    Parameters
    ----------
    col : pandas.Series
        A dataframe column.

    Returns
    -------
    out : str
        One of ['c', 'o', 'u'].

    See Also
    --------
    The origin of the character codes is
    :class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
    """
    if pdtypes.is_numeric_dtype(col):
        # continuous
        return 'c'
    elif pdtypes.is_categorical_dtype(col):
        # ordered or unordered
        return 'o' if col.cat.ordered else 'u'
    else:
        # unordered if unsure, e.g string columns that
        # are not categorical
        return 'u' 
開發者ID:has2k1,項目名稱:plotnine,代碼行數:31,代碼來源:density.py

示例10: fix_known_differences

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def fix_known_differences(orig, result):
    """
    Helper function for reducing anndata's to only the elements we expect to be
    equivalent after concatenation.

    Only for the case where orig is the ground truth result of what concatenation should be.
    """
    orig = orig.copy()
    result = result.copy()

    result.obs.drop(columns=["batch"], inplace=True)
    result.strings_to_categoricals()  # Should this be implicit in concatenation?

    # TODO
    # * merge varm, varp similar to uns
    # * merge obsp, but some information should be lost
    del orig.varm
    del orig.varp
    del orig.obsp  # TODO

    # Possibly need to fix this, ordered categoricals lose orderedness
    for k, dtype in orig.obs.dtypes.items():
        if is_categorical_dtype(dtype) and dtype.ordered:
            result.obs[k] = result.obs[k].astype(dtype)

    return orig, result 
開發者ID:theislab,項目名稱:anndata,代碼行數:28,代碼來源:test_concatenate.py

示例11: describe

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def describe(data):
    '''
    對每個變量生成統計指標特征
    對於每一個變量,生成如下字段:
        數據類型:
        最大值/頻數最大的那個:
        最小值/頻數最小的那個:
        均值/頻數中間的那個:
        缺失率:
        範圍/唯一數:
    '''

    data=pd.DataFrame(data)
    n_sample=len(data)
    var_type=type_of_var(data,copy=True)
    summary=pd.DataFrame(columns=data.columns,index=['dtype','max','min','mean','missing_pct','std/nuniue'])
    for c in data.columns:
        missing_pct=1-data[c].count()/n_sample
        if var_type[c] == 'number':
            max_value,min_value,mean_value=data[c].max(),data[c].min(),data[c].mean()
            std_value=data[c].std()
            summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,std_value]
        elif var_type[c] == 'category' or is_categorical_dtype(data[c].dtype):
            tmp=data[c].value_counts()
            max_value,min_value=tmp.argmax(),tmp.argmin()
            mean_value_index=tmp[tmp==tmp.median()].index
            mean_value=mean_value_index[0] if len(mean_value_index)>0 else np.nan
            summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,len(tmp)]
        elif var_type[c] == 'datetime':
            max_value,min_value=data[c].max(),data[c].min()
            summary.loc[:,c]=[var_type[c],max_value,min_value,np.nan,missing_pct,np.nan]
        else:
            summary.loc[:,c]=[var_type[c],np.nan,np.nan,np.nan,missing_pct,np.nan]
    return summary 
開發者ID:gasongjian,項目名稱:reportgen,代碼行數:36,代碼來源:analysis.py

示例12: fit

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def fit(
        self, X: DataFrameType, y: Optional[Union[ArrayLike, SeriesType]] = None
    ) -> "OrdinalEncoder":
        """Determine the categorical columns to be encoded.

        Parameters
        ----------
        X : pandas.DataFrame or dask.dataframe.DataFrame
        y : ignored

        Returns
        -------
        self
        """
        self.columns_ = X.columns
        columns = self.columns
        if columns is None:
            columns = X.select_dtypes(include=["category"]).columns
        else:
            for column in columns:
                assert is_categorical_dtype(X[column]), "Must be categorical"

        self.categorical_columns_ = columns
        self.non_categorical_columns_ = X.columns.drop(self.categorical_columns_)

        if _HAS_CTD:
            self.dtypes_ = {col: X[col].dtype for col in self.categorical_columns_}
        else:
            self.dtypes_ = {
                col: (X[col].cat.categories, X[col].cat.ordered)
                for col in self.categorical_columns_
            }

        return self 
開發者ID:dask,項目名稱:dask-ml,代碼行數:36,代碼來源:data.py

示例13: test_ce

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_ce(self):
        ce = dpp.Categorizer()
        original = raw.copy()
        trn = ce.fit_transform(raw)
        assert is_categorical_dtype(trn["A"])
        assert is_categorical_dtype(trn["B"])
        assert is_categorical_dtype(trn["C"])
        assert trn["D"].dtype == np.dtype("int64")
        tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
        tm.assert_frame_equal(raw, original) 
開發者ID:dask,項目名稱:dask-ml,代碼行數:12,代碼來源:test_data.py

示例14: test_dask

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_dask(self):
        a = dd.from_pandas(raw, npartitions=2)
        ce = dpp.Categorizer()
        trn = ce.fit_transform(a)
        assert is_categorical_dtype(trn["A"])
        assert is_categorical_dtype(trn["B"])
        assert is_categorical_dtype(trn["C"])
        assert trn["D"].dtype == np.dtype("int64")
        tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"])) 
開發者ID:dask,項目名稱:dask-ml,代碼行數:11,代碼來源:test_data.py

示例15: test_upload_pandas_categorical_ipc

# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_upload_pandas_categorical_ipc(self, con):

        con.execute("DROP TABLE IF EXISTS test_categorical;")

        df = pd.DataFrame({"A": ["a", "b", "c", "a"]})
        df["B"] = df["A"].astype('category')

        # test that table created correctly when it doesn't exist on server
        con.load_table("test_categorical", df)
        ans = con.execute("select * from test_categorical").fetchall()

        assert ans == [('a', 'a'), ('b', 'b'), ('c', 'c'), ('a', 'a')]

        assert con.get_table_details("test_categorical") == [
            ColumnDetails(
                name='A',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='B',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
        ]

        # load row-wise
        con.load_table("test_categorical", df, method="rows")

        # load columnar
        con.load_table("test_categorical", df, method="columnar")

        # load arrow
        con.load_table("test_categorical", df, method="arrow")

        # test end result
        df_ipc = con.select_ipc("select * from test_categorical")
        assert df_ipc.shape == (16, 2)

        res = df.append([df, df, df]).reset_index(drop=True)
        res["A"] = res["A"].astype('category')
        res["B"] = res["B"].astype('category')
        assert pd.DataFrame.equals(df_ipc, res)

        # test that input df wasn't mutated
        # original input is object, categorical
        # to load via Arrow, converted internally to object, object
        assert is_object_dtype(df["A"])
        assert is_categorical_dtype(df["B"])
        con.execute("DROP TABLE IF EXISTS test_categorical;") 
開發者ID:omnisci,項目名稱:pymapd,代碼行數:62,代碼來源:test_integration.py


注:本文中的pandas.api.types.is_categorical_dtype方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。