当前位置: 首页>>代码示例>>Python>>正文


Python types.is_categorical_dtype方法代码示例

本文整理汇总了Python中pandas.api.types.is_categorical_dtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.is_categorical_dtype方法的具体用法?Python types.is_categorical_dtype怎么用?Python types.is_categorical_dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.api.types的用法示例。


在下文中一共展示了types.is_categorical_dtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: write_series

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
    # group here is an h5py type, otherwise categoricals won’t write
    if series.dtype == object:  # Assuming it’s string
        group.create_dataset(
            key,
            data=series.values,
            dtype=h5py.special_dtype(vlen=str),
            **dataset_kwargs,
        )
    elif is_categorical_dtype(series):
        # This should work for categorical Index and Series
        categorical: pd.Categorical = series.values
        categories: np.ndarray = categorical.categories.values
        codes: np.ndarray = categorical.codes
        category_key = f"__categories/{key}"

        write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
        write_array(group, key, codes, dataset_kwargs=dataset_kwargs)

        group[key].attrs["categories"] = group[category_key].ref
        group[category_key].attrs["ordered"] = categorical.ordered
    else:
        group[key] = series.values 
开发者ID:theislab,项目名称:anndata,代码行数:25,代码来源:h5ad.py

示例2: write_series

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
    if series.dtype == object:
        group.create_dataset(
            key,
            shape=series.shape,
            dtype=object,
            object_codec=numcodecs.VLenUTF8(),
            **dataset_kwargs,
        )
        group[key][:] = series.values
    elif is_categorical_dtype(series):
        # This should work for categorical Index and Series
        categorical: pd.Categorical = series.values
        categories: np.ndarray = categorical.categories.values
        codes: np.ndarray = categorical.codes
        category_key = f"__categories/{key}"

        write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
        write_array(group, key, codes, dataset_kwargs=dataset_kwargs)

        group[key].attrs["categories"] = category_key
        # Must coerce np.bool_ to bool for json writing
        group[category_key].attrs["ordered"] = bool(categorical.ordered)
    else:
        group[key] = series.values 
开发者ID:theislab,项目名称:anndata,代码行数:27,代码来源:zarr.py

示例3: _fit

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def _fit(self, X: DataFrameType):
        if self.columns is None:
            columns = X.select_dtypes(include=["object", "category"]).columns
        else:
            columns = self.columns
        categories = {}
        for name in columns:
            col = X[name]
            if not is_categorical_dtype(col):
                # This shouldn't ever be hit on a dask.array, since
                # the object columns would have been converted to known cats
                # already
                col = pd.Series(col, index=X.index).astype("category")

            if _HAS_CTD:
                categories[name] = col.dtype
            else:
                categories[name] = (col.cat.categories, col.cat.ordered)

        return columns, categories 
开发者ID:dask,项目名称:dask-ml,代码行数:22,代码来源:data.py

示例4: _add_group_columns

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def _add_group_columns(data, gdf):
    """
    Add group columns to data with a value from the grouped dataframe

    It is assumed that the grouped dataframe contains a single group

    >>> data = pd.DataFrame({
    ...     'x': [5, 6, 7]})
    >>> gdf = GroupedDataFrame({
    ...     'g': list('aaa'),
    ...     'x': range(3)}, groups=['g'])
    >>> _add_group_columns(data, gdf)
       g  x
    0  a  5
    1  a  6
    2  a  7
    """
    n = len(data)
    if isinstance(gdf, GroupedDataFrame):
        for i, col in enumerate(gdf.plydata_groups):
            if col not in data:
                group_values = [gdf[col].iloc[0]] * n
                # Need to be careful and maintain the dtypes
                # of the group columns
                if pdtypes.is_categorical_dtype(gdf[col]):
                    col_values = pd.Categorical(
                        group_values,
                        categories=gdf[col].cat.categories,
                        ordered=gdf[col].cat.ordered
                    )
                else:
                    col_values = pd.Series(
                        group_values,
                        index=data.index,
                        dtype=gdf[col].dtype
                    )
                # Group columns come first
                data.insert(i, col, col_values)
    return data 
开发者ID:has2k1,项目名称:plydata,代码行数:41,代码来源:common.py

示例5: test_summarize

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def test_summarize():
    df = pd.DataFrame({'x': [1, 5, 2, 2, 4, 0, 4],
                       'y': [1, 2, 3, 4, 5, 6, 5],
                       'z': [1, 3, 3, 4, 5, 5, 5]})

    result = df >> summarize('np.sum(x)', max='np.max(x)')
    assert result.loc[0, 'max'] == np.max(df['x'])
    assert result.loc[0, 'np.sum(x)'] == np.sum(df['x'])

    result = df >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
    assert 'y' in result
    assert 'z' in result
    assert all(result['mean_x'] == [1, 5, 2, 2, 4, 0])

    # (Name, Expression) tuples
    result = df >> summarize(('sum', 'np.sum(x)'), ('max', 'np.max(x)'))
    assert 'sum' in result
    assert 'max' in result

    # Branches
    result = df >> group_by('y') >> summarize('np.sum(z)', constant=1)
    assert 'y' in result
    assert result.loc[0, 'constant'] == 1

    # Category stays category
    df1 = df.copy()
    df1['z'] = pd.Categorical(df1['z'])
    result = df1 >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
    assert result['y'].dtype == np.int
    assert pdtypes.is_categorical_dtype(result['z']) 
开发者ID:has2k1,项目名称:plydata,代码行数:32,代码来源:test_dataframe.py

示例6: test_group_by_all

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def test_group_by_all():
    df = pd.DataFrame({
        'alpha': list('aaabbb'),
        'beta': list('babruq'),
        'theta': list('cdecde'),
        'x': [1, 2, 3, 4, 5, 6],
        'y': [6, 5, 4, 3, 2, 1],
        'z': [7, 9, 11, 8, 10, 12]
    })

    result = df >> group_by_all()
    assert len(df.columns) == len(result.columns)
    assert len(df.columns) == len(result.plydata_groups)

    result = df >> group_by_all(pd.Categorical)
    assert len(df.columns) == len(result.columns)
    assert len(df.columns) == len(result.plydata_groups)

    result = df >> group_by_all(dict(cat=pd.Categorical))
    assert len(df.columns)*2 == len(result.columns)
    for col in df.columns:
        col_cat = '{}_cat'.format(col)
        assert not pdtypes.is_categorical_dtype(result[col])
        assert pdtypes.is_categorical_dtype(result[col_cat])

    result = (df
              >> group_by('x')
              >> group_by_all(dict(cat=pd.Categorical)))
    assert result.plydata_groups == [
        '{}_cat'.format(col) for col in df.columns if col != 'x']
    assert len(df.columns)*2-1 == len(result.columns)
    assert 'x_cat' not in result 
开发者ID:has2k1,项目名称:plydata,代码行数:34,代码来源:test_dataframe.py

示例7: get_errors

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def get_errors(self, series: pd.Series, column: 'column.Column'):

        errors = []

        # Calculate which columns are valid using the child class's validate function, skipping empty entries if the
        # column specifies to do so
        simple_validation = ~self.validate(series)
        if column.allow_empty:
            # Failing results are those that are not empty, and fail the validation
            # explicitly check to make sure the series isn't a category because issubdtype will FAIL if it is
            if is_categorical_dtype(series) or is_numeric_dtype(series):
                validated = ~series.isnull() & simple_validation
            else:
                validated = (series.str.len() > 0) & simple_validation

        else:
            validated = simple_validation

        # Cut down the original series to only ones that failed the validation
        indices = series.index[validated]

        # Use these indices to find the failing items. Also print the index which is probably a row number
        for i in indices:
            element = series[i]
            errors.append(ValidationWarning(
                message=self.message,
                value=element,
                row=i,
                column=series.name
            ))

        return errors 
开发者ID:TMiguelT,项目名称:PandasSchema,代码行数:34,代码来源:validation.py

示例8: _id_var

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def _id_var(x, drop=False):
    """
    Assign ids to items in x. If two items
    are the same, they get the same id.

    Parameters
    ----------
    x : array-like
        items to associate ids with
    drop : bool
        Whether to drop unused factor levels
    """
    if len(x) == 0:
        return []

    categorical = pdtypes.is_categorical_dtype(x)

    if categorical:
        if drop:
            x = x.cat.remove_unused_categories()
            lst = list(x.cat.codes + 1)
        else:
            has_nan = any(np.isnan(i) for i in x if isinstance(i, float))
            if has_nan:
                # NaNs are -1, we give them the highest code
                nan_code = -1
                new_nan_code = np.max(x.cat.codes) + 1
                lst = [val if val != nan_code else new_nan_code for val in x]
            else:
                lst = list(x.cat.codes + 1)
    else:
        try:
            levels = np.sort(np.unique(x))
        except TypeError:
            # x probably has NANs
            levels = multitype_sort(set(x))

        lst = match(x, levels)
        lst = [item + 1 for item in lst]

    return lst 
开发者ID:has2k1,项目名称:plotnine,代码行数:43,代码来源:utils.py

示例9: get_var_type

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def get_var_type(col):
    """
    Return var_type (for KDEMultivariate) of the column

    Parameters
    ----------
    col : pandas.Series
        A dataframe column.

    Returns
    -------
    out : str
        One of ['c', 'o', 'u'].

    See Also
    --------
    The origin of the character codes is
    :class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
    """
    if pdtypes.is_numeric_dtype(col):
        # continuous
        return 'c'
    elif pdtypes.is_categorical_dtype(col):
        # ordered or unordered
        return 'o' if col.cat.ordered else 'u'
    else:
        # unordered if unsure, e.g string columns that
        # are not categorical
        return 'u' 
开发者ID:has2k1,项目名称:plotnine,代码行数:31,代码来源:density.py

示例10: fix_known_differences

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def fix_known_differences(orig, result):
    """
    Helper function for reducing anndata's to only the elements we expect to be
    equivalent after concatenation.

    Only for the case where orig is the ground truth result of what concatenation should be.
    """
    orig = orig.copy()
    result = result.copy()

    result.obs.drop(columns=["batch"], inplace=True)
    result.strings_to_categoricals()  # Should this be implicit in concatenation?

    # TODO
    # * merge varm, varp similar to uns
    # * merge obsp, but some information should be lost
    del orig.varm
    del orig.varp
    del orig.obsp  # TODO

    # Possibly need to fix this, ordered categoricals lose orderedness
    for k, dtype in orig.obs.dtypes.items():
        if is_categorical_dtype(dtype) and dtype.ordered:
            result.obs[k] = result.obs[k].astype(dtype)

    return orig, result 
开发者ID:theislab,项目名称:anndata,代码行数:28,代码来源:test_concatenate.py

示例11: describe

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def describe(data):
    '''
    对每个变量生成统计指标特征
    对于每一个变量,生成如下字段:
        数据类型:
        最大值/频数最大的那个:
        最小值/频数最小的那个:
        均值/频数中间的那个:
        缺失率:
        范围/唯一数:
    '''

    data=pd.DataFrame(data)
    n_sample=len(data)
    var_type=type_of_var(data,copy=True)
    summary=pd.DataFrame(columns=data.columns,index=['dtype','max','min','mean','missing_pct','std/nuniue'])
    for c in data.columns:
        missing_pct=1-data[c].count()/n_sample
        if var_type[c] == 'number':
            max_value,min_value,mean_value=data[c].max(),data[c].min(),data[c].mean()
            std_value=data[c].std()
            summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,std_value]
        elif var_type[c] == 'category' or is_categorical_dtype(data[c].dtype):
            tmp=data[c].value_counts()
            max_value,min_value=tmp.argmax(),tmp.argmin()
            mean_value_index=tmp[tmp==tmp.median()].index
            mean_value=mean_value_index[0] if len(mean_value_index)>0 else np.nan
            summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,len(tmp)]
        elif var_type[c] == 'datetime':
            max_value,min_value=data[c].max(),data[c].min()
            summary.loc[:,c]=[var_type[c],max_value,min_value,np.nan,missing_pct,np.nan]
        else:
            summary.loc[:,c]=[var_type[c],np.nan,np.nan,np.nan,missing_pct,np.nan]
    return summary 
开发者ID:gasongjian,项目名称:reportgen,代码行数:36,代码来源:analysis.py

示例12: fit

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def fit(
        self, X: DataFrameType, y: Optional[Union[ArrayLike, SeriesType]] = None
    ) -> "OrdinalEncoder":
        """Determine the categorical columns to be encoded.

        Parameters
        ----------
        X : pandas.DataFrame or dask.dataframe.DataFrame
        y : ignored

        Returns
        -------
        self
        """
        self.columns_ = X.columns
        columns = self.columns
        if columns is None:
            columns = X.select_dtypes(include=["category"]).columns
        else:
            for column in columns:
                assert is_categorical_dtype(X[column]), "Must be categorical"

        self.categorical_columns_ = columns
        self.non_categorical_columns_ = X.columns.drop(self.categorical_columns_)

        if _HAS_CTD:
            self.dtypes_ = {col: X[col].dtype for col in self.categorical_columns_}
        else:
            self.dtypes_ = {
                col: (X[col].cat.categories, X[col].cat.ordered)
                for col in self.categorical_columns_
            }

        return self 
开发者ID:dask,项目名称:dask-ml,代码行数:36,代码来源:data.py

示例13: test_ce

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def test_ce(self):
        ce = dpp.Categorizer()
        original = raw.copy()
        trn = ce.fit_transform(raw)
        assert is_categorical_dtype(trn["A"])
        assert is_categorical_dtype(trn["B"])
        assert is_categorical_dtype(trn["C"])
        assert trn["D"].dtype == np.dtype("int64")
        tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
        tm.assert_frame_equal(raw, original) 
开发者ID:dask,项目名称:dask-ml,代码行数:12,代码来源:test_data.py

示例14: test_dask

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def test_dask(self):
        a = dd.from_pandas(raw, npartitions=2)
        ce = dpp.Categorizer()
        trn = ce.fit_transform(a)
        assert is_categorical_dtype(trn["A"])
        assert is_categorical_dtype(trn["B"])
        assert is_categorical_dtype(trn["C"])
        assert trn["D"].dtype == np.dtype("int64")
        tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"])) 
开发者ID:dask,项目名称:dask-ml,代码行数:11,代码来源:test_data.py

示例15: test_upload_pandas_categorical_ipc

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_categorical_dtype [as 别名]
def test_upload_pandas_categorical_ipc(self, con):

        con.execute("DROP TABLE IF EXISTS test_categorical;")

        df = pd.DataFrame({"A": ["a", "b", "c", "a"]})
        df["B"] = df["A"].astype('category')

        # test that table created correctly when it doesn't exist on server
        con.load_table("test_categorical", df)
        ans = con.execute("select * from test_categorical").fetchall()

        assert ans == [('a', 'a'), ('b', 'b'), ('c', 'c'), ('a', 'a')]

        assert con.get_table_details("test_categorical") == [
            ColumnDetails(
                name='A',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
            ColumnDetails(
                name='B',
                type='STR',
                nullable=True,
                precision=0,
                scale=0,
                comp_param=32,
                encoding='DICT',
                is_array=False,
            ),
        ]

        # load row-wise
        con.load_table("test_categorical", df, method="rows")

        # load columnar
        con.load_table("test_categorical", df, method="columnar")

        # load arrow
        con.load_table("test_categorical", df, method="arrow")

        # test end result
        df_ipc = con.select_ipc("select * from test_categorical")
        assert df_ipc.shape == (16, 2)

        res = df.append([df, df, df]).reset_index(drop=True)
        res["A"] = res["A"].astype('category')
        res["B"] = res["B"].astype('category')
        assert pd.DataFrame.equals(df_ipc, res)

        # test that input df wasn't mutated
        # original input is object, categorical
        # to load via Arrow, converted internally to object, object
        assert is_object_dtype(df["A"])
        assert is_categorical_dtype(df["B"])
        con.execute("DROP TABLE IF EXISTS test_categorical;") 
开发者ID:omnisci,项目名称:pymapd,代码行数:62,代码来源:test_integration.py


注:本文中的pandas.api.types.is_categorical_dtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。