当前位置: 首页>>代码示例>>Python>>正文


Python types.is_numeric_dtype方法代码示例

本文整理汇总了Python中pandas.api.types.is_numeric_dtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.is_numeric_dtype方法的具体用法?Python types.is_numeric_dtype怎么用?Python types.is_numeric_dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.api.types的用法示例。


在下文中一共展示了types.is_numeric_dtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: oob_dependences

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def oob_dependences(rf, X_train, n_samples=5000):
    """
    Given a random forest model, rf, and training observation independent
    variables in X_train (a dataframe), compute the OOB R^2 score using each var
    as a dependent variable. We retrain rf for each var.    Only numeric columns are considered.

    By default, sample up to 5000 observations to compute feature dependencies.

    :return: Return a DataFrame with Feature/Dependence values for each variable. Feature is the dataframe index.
    """
    numcols = [col for col in X_train if is_numeric_dtype(X_train[col])]

    X_train = sample_rows(X_train, n_samples)

    df_dep = pd.DataFrame(columns=['Feature','Dependence'])
    df_dep = df_dep.set_index('Feature')
    for col in numcols:
        X, y = X_train.drop(col, axis=1), X_train[col]
        rf.fit(X, y)
        df_dep.loc[col] = rf.oob_score_
    df_dep = df_dep.sort_values('Dependence', ascending=False)
    return df_dep 
开发者ID:canard0328,项目名称:malss,代码行数:24,代码来源:rfpimp.py

示例2: _get_columns_info

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _get_columns_info(self, stats):
        column_info = {}
        column_info[self.TYPE_CONSTANT] = stats['uniques'][stats['uniques'] == 1].index
        column_info[self.TYPE_BOOL] = stats['uniques'][stats['uniques'] == 2].index
        rest_columns = self.get_columns(self.df,
                                        self.EXCLUDE,
                                        column_info['constant'].union(column_info['bool']))
        column_info[self.TYPE_NUMERIC] = pd.Index([c for c in rest_columns
                                                   if types.is_numeric_dtype(self.df[c])])
        rest_columns = self.get_columns(
            self.df[rest_columns], self.EXCLUDE, column_info['numeric'])
        column_info[self.TYPE_DATE] = pd.Index([c for c in rest_columns
                                                if types.is_datetime64_dtype(self.df[c])])
        rest_columns = self.get_columns(
            self.df[rest_columns], self.EXCLUDE, column_info['date'])
        unique_columns = stats['uniques'][rest_columns] == stats['counts'][rest_columns]
        column_info[self.TYPE_UNIQUE] = stats['uniques'][rest_columns][unique_columns].index
        column_info[self.TYPE_CATEGORICAL] = stats['uniques'][rest_columns][~unique_columns].index
        return column_info 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:21,代码来源:__init__.py

示例3: generate_plotly_dim_dict

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def generate_plotly_dim_dict(df, field):
    dim_dict = {}
    dim_dict["label"] = field
    column = df[field]
    if is_numeric_dtype(column):
        dim_dict["values"] = column
    elif is_string_dtype(column):
        texts = column.unique()
        dim_dict["values"] = [
            np.argwhere(texts == x).flatten()[0] for x in column
        ]
        dim_dict["tickvals"] = list(range(len(texts)))
        dim_dict["ticktext"] = texts
    else:
        raise Exception("Unidentifiable Type")

    return dim_dict 
开发者ID:ray-project,项目名称:ray,代码行数:19,代码来源:visual_utils.py

示例4: merger_data

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def merger_data(data, var, unique_num,is_merge_high=True):
    if is_numeric_dtype(data[var]) and data[var].nunique() > unique_num:
        data_miss = data[data[var] == -9999999]
        data_nomiss = data[data[var] != -9999999]
        merge_high_data = data_nomiss[var]
        if is_merge_high:
            merge_high_data = toad.utils.clip(data_nomiss[var], quantile=(None, .99))
        data_index, bins = toad.merge(merge_high_data, method='step', return_splits=True, n_bins=unique_num)
        temp = pd.DataFrame(data_index, columns=[var])
        temp = temp.append(data_miss[[var]], ignore_index=True)[var]
        target = data_nomiss.append(data_miss, ignore_index=True)['target']
        return temp, target, bins
    else:
        return data[var], None, None


# 两两指标做透视表 
开发者ID:amphibian-dev,项目名称:toad,代码行数:19,代码来源:evaluate.py

示例5: check_cateCols_uniqueValues

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def check_cateCols_uniqueValues(dat, var_skip = None):
    # character columns with too many unique values
    char_cols = [i for i in list(dat) if not is_numeric_dtype(dat[i])]
    if var_skip is not None: 
        char_cols = list(set(char_cols) - set(str_to_list(var_skip)))
    char_cols_too_many_unique = [i for i in char_cols if len(dat[i].unique()) >= 50]
    if len(char_cols_too_many_unique) > 0:
        print('>>> There are {} variables have too many unique non-numberic values, which might cause the binning process slow. Please double check the following variables: \n{}'.format(len(char_cols_too_many_unique), ', '.join(char_cols_too_many_unique)))
        print('>>> Continue the binning process?')
        print('1: yes \n2: no')
        cont = int(input("Selection: "))
        while cont not in [1, 2]:
            cont = int(input("Selection: "))
        if cont == 2:
            raise SystemExit(0)
    return None


# replace blank by NA
#' @import data.table
#' 
开发者ID:ShichenXie,项目名称:scorecardpy,代码行数:23,代码来源:condition_fun.py

示例6: check_data_types

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def check_data_types(self, data_frame: pd.DataFrame) -> None:
        """

        Checks whether a column contains string or numeric data

        :param data_frame:
        :return:
        """
        self.numeric_columns = [c for c in self.input_columns if is_numeric_dtype(data_frame[c])]
        self.string_columns = list(set(self.input_columns) - set(self.numeric_columns))
        self.output_type = 'numeric' if is_numeric_dtype(data_frame[self.output_column]) else 'string'

        logger.debug(
            "Assuming {} numeric input columns: {}".format(len(self.numeric_columns),
                                                           ", ".join(self.numeric_columns)))
        logger.debug("Assuming {} string input columns: {}".format(len(self.string_columns),
                                                                  ", ".join(self.string_columns))) 
开发者ID:awslabs,项目名称:datawig,代码行数:19,代码来源:simple_imputer.py

示例7: prepare_data

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def prepare_data(self, candles_df, ohlc):

        if isinstance(candles_df, pd.DataFrame):

            if len(candles_df) >= self.required_count:
                if ohlc and len(ohlc) == 4:
                    if not set(ohlc).issubset(candles_df.columns):
                        raise Exception('Provided columns does not exist in given data frame')

                    self.open_column = ohlc[0]
                    self.high_column = ohlc[1]
                    self.low_column = ohlc[2]
                    self.close_column = ohlc[3]
                else:
                    raise Exception('Provide list of four elements indicating columns in strings. '
                                    'Default: [open, high, low, close]')

                self.data = candles_df.copy()

                if not is_numeric_dtype(self.data[self.close_column]):
                    self.data[self.close_column] = pd.to_numeric(self.data[self.close_column])

                if not is_numeric_dtype(self.data[self.open_column]):
                    self.data[self.open_column] = pd.to_numeric(self.data[self.open_column])

                if not is_numeric_dtype(self.data[self.low_column]):
                    self.data[self.low_column] = pd.to_numeric(self.data[self.low_column])

                if not is_numeric_dtype(self.data[self.high_column]):
                    self.data[self.high_column] = pd.to_numeric(candles_df[self.high_column])

                self.is_data_prepared = True
            else:
                raise Exception('{0} requires at least {1} data'.format(self.name,
                                                                        self.required_count))
        else:
            raise Exception('Candles must be in Panda data frame type') 
开发者ID:SpiralDevelopment,项目名称:candlestick-patterns,代码行数:39,代码来源:candlestick_finder.py

示例8: feature_dependence_matrix

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def feature_dependence_matrix(rf, X_train, n_samples=5000):
    """
    Given training observation independent variables in X_train (a dataframe),
    compute the feature importance using each var as a dependent variable.
    We retrain a random forest for each var as target using the others as
    independent vars.  Only numeric columns are considered.

    By default, sample up to 5000 observations to compute feature dependencies.

    :return: a non-symmetric data frame with the dependence matrix where each row is the importance of each var to the row's var used as a model target.
    """
    numcols = [col for col in X_train if is_numeric_dtype(X_train[col])]

    X_train = sample_rows(X_train, n_samples)

    df_dep = pd.DataFrame(index=X_train.columns, columns=['Dependence']+X_train.columns.tolist())
    for i in range(len(numcols)):
        col = numcols[i]
        X, y = X_train.drop(col, axis=1), X_train[col]
        rf.fit(X,y)
        #imp = rf.feature_importances_
        imp = permutation_importances_raw(rf, X, y, oob_regression_r2_score, n_samples)
        imp = np.insert(imp, i, 1.0)
        df_dep.iloc[i] = np.insert(imp, 0, rf.oob_score_) # add overall dependence

    return df_dep 
开发者ID:canard0328,项目名称:malss,代码行数:28,代码来源:rfpimp.py

示例9: is_numeric_dtype

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def is_numeric_dtype(arr_or_dtype):
            # Crude implementation only suitable for array-like types
            try:
                tipo = arr_or_dtype.dtype.type
            except AttributeError:
                tipo = type(None)
            return (issubclass(tipo, (np.number, np.bool_)) and
                    not issubclass(tipo, (np.datetime64, np.timedelta64))) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:10,代码来源:pandas.py

示例10: get_errors

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def get_errors(self, series: pd.Series, column: 'column.Column'):

        errors = []

        # Calculate which columns are valid using the child class's validate function, skipping empty entries if the
        # column specifies to do so
        simple_validation = ~self.validate(series)
        if column.allow_empty:
            # Failing results are those that are not empty, and fail the validation
            # explicitly check to make sure the series isn't a category because issubdtype will FAIL if it is
            if is_categorical_dtype(series) or is_numeric_dtype(series):
                validated = ~series.isnull() & simple_validation
            else:
                validated = (series.str.len() > 0) & simple_validation

        else:
            validated = simple_validation

        # Cut down the original series to only ones that failed the validation
        indices = series.index[validated]

        # Use these indices to find the failing items. Also print the index which is probably a row number
        for i in indices:
            element = series[i]
            errors.append(ValidationWarning(
                message=self.message,
                value=element,
                row=i,
                column=series.name
            ))

        return errors 
开发者ID:TMiguelT,项目名称:PandasSchema,代码行数:34,代码来源:validation.py

示例11: fit

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def fit(self, X, y):
        """Fit the Imputer to the dataset and determine the right approach.

        Args:
            X (pd.Series): Dataset to fit the imputer, or predictors
            y (pd.Series): None, or dataset to fit predictors

        Returns:
            self. Instance of the class.
        """
        # start off with stats blank
        stats = {"param": None, "strategy": None}

        # if y is None, fitting simply X. univariate method.
        if y is None:
            if is_numeric_dtype(X):
                stats = {"param": self.num_imputer.fit(X, y),
                         "strategy": self.num_imputer.strategy}
            if is_string_dtype(X):
                stats = {"param": self.cat_imputer.fit(X, y),
                         "strategy": self.cat_imputer.strategy}

        # if y is not None, fitting X to y. predictive method.
        if not y is None:
            if is_numeric_dtype(y):
                stats = {"param": self.num_imputer.fit(X, y),
                         "strategy": self.num_imputer.strategy}
            if is_string_dtype(y):
                stats = {"param": self.cat_imputer.fit(X, y),
                         "strategy": self.cat_imputer.strategy}

        # return final stats
        self.statistics_ = stats
        return self 
开发者ID:kearnz,项目名称:autoimpute,代码行数:36,代码来源:default.py

示例12: _not_num_series

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _not_num_series(m, s):
    """Private method to detect columns of Matrix that are not categorical."""
    if not is_numeric_dtype(s):
        t = s.dtype
        err = f"{m} not appropriate for Series {s.name} of type {t}."
        raise TypeError(err) 
开发者ID:kearnz,项目名称:autoimpute,代码行数:8,代码来源:errors.py

示例13: _process_chart_y

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _process_chart_y(self, data, x, y, single_y):
        """This should happen after _process_chart_x"""
        y = y or self.y
        if y is None:
            ys = [c for c in data.columns if c not in [x]+self.by+self.groupby+self.grid]
            if len(ys) > 1:
                # if columns have different dtypes, only include numeric columns
                from pandas.api.types import is_numeric_dtype as isnum
                num_ys = [dim for dim in ys if isnum(data[dim])]
                if len(num_ys) >= 1:
                    ys = num_ys
            y = ys[0] if len(ys) == 1 or single_y else ys
        return y 
开发者ID:holoviz,项目名称:hvplot,代码行数:15,代码来源:converter.py

示例14: get_var_type

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def get_var_type(col):
    """
    Return var_type (for KDEMultivariate) of the column

    Parameters
    ----------
    col : pandas.Series
        A dataframe column.

    Returns
    -------
    out : str
        One of ['c', 'o', 'u'].

    See Also
    --------
    The origin of the character codes is
    :class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
    """
    if pdtypes.is_numeric_dtype(col):
        # continuous
        return 'c'
    elif pdtypes.is_categorical_dtype(col):
        # ordered or unordered
        return 'o' if col.cat.ordered else 'u'
    else:
        # unordered if unsure, e.g string columns that
        # are not categorical
        return 'u' 
开发者ID:has2k1,项目名称:plotnine,代码行数:31,代码来源:density.py

示例15: assert_equal_ndarray

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def assert_equal_ndarray(a, b, exact=False, elem_name=None):
    b = asarray(b)
    if not exact and is_numeric_dtype(a) and is_numeric_dtype(b):
        assert a.shape == b.shape, format_msg(elem_name)
        assert np.allclose(a, b, equal_nan=True), format_msg(elem_name)
    elif (  # Structured dtype
        not exact
        and hasattr(a, "dtype")
        and hasattr(b, "dtype")
        and len(a.dtype) > 1
        and len(b.dtype) > 0
    ):
        assert_equal(pd.DataFrame(a), pd.DataFrame(b), exact, elem_name)
    else:
        assert np.all(a == b), format_msg(elem_name) 
开发者ID:theislab,项目名称:anndata,代码行数:17,代码来源:helpers.py


注:本文中的pandas.api.types.is_numeric_dtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。