当前位置: 首页>>代码示例>>Python>>正文


Python pandas.qcut方法代码示例

本文整理汇总了Python中pandas.qcut方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.qcut方法的具体用法?Python pandas.qcut怎么用?Python pandas.qcut使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.qcut方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _compute_stats

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def _compute_stats(self, pred, expo, loss, prem):
        n_samples, n_groups = pred.shape[0], self.n_groups
        pred_ser = pd.Series(pred)
        loss_to_returns = np.sum(loss) / np.sum(prem)

        rank = pd.qcut(pred_ser, n_groups, labels=False)
        n_groups = np.amax(rank) + 1
        groups = np.arange(n_groups)  # if we ever go back to using n_groups...

        tab = pd.DataFrame({
            'rank': rank,
            'pred': pred,
            'prem': prem,
            'loss': loss,
            'expo': expo
        })

        grouped = tab[['rank', 'pred', 'prem', 'loss', 'expo']].groupby('rank')
        agg_rlr = (grouped['loss'].agg(np.sum) / grouped['prem'].agg(np.sum)) / loss_to_returns

        return tab, agg_rlr, n_groups 
开发者ID:tgsmith61591,项目名称:skutil,代码行数:23,代码来源:_act.py

示例2: create_features

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def create_features(self):
        data = train.append(test)
        age_mean = data['Age'].mean()
        age_std = data['Age'].std()
        self.train['Age'] = pd.qcut(
            train['Age'].fillna(
                np.random.randint(age_mean - age_std, age_mean + age_std)
            ),
            5,
            labels=False
        )
        self.test['Age'] = pd.qcut(
            test['Age'].fillna(
                np.random.randint(age_mean - age_std, age_mean + age_std)
            ),
            5,
            labels=False
        ) 
开发者ID:upura,项目名称:ml-competition-template-titanic,代码行数:20,代码来源:create.py

示例3: upload

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def upload():
    df = pd.read_csv("bitmex_candles1.csv")
    df['change'] = df['close'].diff()
    df['roc'] = df['change']/df['close']
    df['Quantile_rank']=pd.qcut(df['roc'],4,labels=False)

    print (df)
    
    df['roc'].plot()
    key = "bitmex_minute"
    

    #with open('temp.json', 'w') as f:
    candle_json = df.to_json(orient='records', lines=True)
    #f.write(df.to_json(orient='records', lines=True))
    key = "bitmex_history_0404"
    put_s3_public(bucket_name, key, candle_json) 
开发者ID:economicnetwork,项目名称:archon,代码行数:19,代码来源:bitmex_history.py

示例4: var_bins

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def var_bins(quality):
    quality.sort_values(by='iv', ascending=False, inplace=True)
    var_group_list = []
    if len(quality) < 10:
        for temp in quality.index.tolist():
            var_group_list.append([temp])
    else:
        bins = pd.qcut(range(len(quality)), 10, labels=False)
        df_var = pd.DataFrame(columns=['num', 'var', 'iv'])
        df_var['num'] = bins
        df_var['var'] = quality.index
        for group, temp in df_var.groupby(by='num'):
            var_group_list.append(temp['var'].tolist())
    return var_group_list


# 用woe替换离散变量 
开发者ID:amphibian-dev,项目名称:toad,代码行数:19,代码来源:evaluate.py

示例5: cohort_plot

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def cohort_plot(data_set_path, metric_to_plot='',ncohort=10):
    assert os.path.isfile(data_set_path),'"{}" is not a valid dataset path'.format(data_set_path)
    churn_data = pd.read_csv(data_set_path,index_col=[0,1])
    groups = pd.qcut(churn_data[metric_to_plot], ncohort, duplicates='drop')
    cohort_means = churn_data.groupby(groups)[metric_to_plot].mean()
    cohort_churns = churn_data.groupby(groups)['is_churn'].mean()
    plot_frame = pd.DataFrame({metric_to_plot: cohort_means.values, 'churn_rate': cohort_churns})
    plt.figure(figsize=(6, 4))
    plt.plot(metric_to_plot, 'churn_rate', data=plot_frame,marker='o', color='black', linewidth=2, label=metric_to_plot)
    plt.xlabel('Cohort Average of  "%s"' % metric_to_plot)
    plt.ylabel('Cohort Churn Rate')
    plt.grid()
    plt.gca().set_ylim(bottom=0)
    save_path = data_set_path.replace('.csv', '_' + metric_to_plot + '_churn_corhort.svg')
    plt.savefig(save_path)
    print('Saving plot to %s' % save_path) 
开发者ID:carl24k,项目名称:fight-churn,代码行数:18,代码来源:listing_5_1_cohort_plot.py

示例6: sample_431

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def sample_431():
    """
    4.3.1 数据的离散化
    :return:
    """
    tsla_df.p_change.hist(bins=80)
    plt.show()

    cats = pd.qcut(np.abs(tsla_df.p_change), 10)
    print('cats.value_counts():\n', cats.value_counts())

    # 将涨跌幅数据手工分类,从负无穷到-7,-5,-3,0, 3, 5, 7,正无穷
    bins = [-np.inf, -7.0, -5, -3, 0, 3, 5, 7, np.inf]
    cats = pd.cut(tsla_df.p_change, bins)
    print('bins cats.value_counts():\n', cats.value_counts())

    # cr_dummies为列名称前缀
    change_ration_dummies = pd.get_dummies(cats, prefix='cr_dummies')
    print('change_ration_dummies.head():\n', change_ration_dummies.head()) 
开发者ID:bbfamily,项目名称:abu,代码行数:21,代码来源:c4.py

示例7: IV_calc

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def IV_calc(data,var):
    if data[var].dtypes == "object":
        dataf = data.groupby([var])['class'].agg(['count','sum'])
        dataf.columns = ["Total","bad"]    
        dataf["good"] = dataf["Total"] - dataf["bad"]
        dataf["bad_per"] = dataf["bad"]/dataf["bad"].sum()
        dataf["good_per"] = dataf["good"]/dataf["good"].sum()
        dataf["I_V"] = (dataf["good_per"] - dataf["bad_per"]) * np.log(dataf["good_per"]/dataf["bad_per"])
        return dataf
    else:
        data['bin_var'] = pd.qcut(data[var].rank(method='first'),10)
        dataf = data.groupby(['bin_var'])['class'].agg(['count','sum'])
        dataf.columns = ["Total","bad"]    
        dataf["good"] = dataf["Total"] - dataf["bad"]
        dataf["bad_per"] = dataf["bad"]/dataf["bad"].sum()
        dataf["good_per"] = dataf["good"]/dataf["good"].sum()
        dataf["I_V"] = (dataf["good_per"] - dataf["bad_per"]) * np.log(dataf["good_per"]/dataf["bad_per"])
        return dataf 
开发者ID:PacktPublishing,项目名称:Statistics-for-Machine-Learning,代码行数:20,代码来源:Chapter 03_Logistic Regression vs Random Forest.py

示例8: generateInputs

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def generateInputs(RunnerObj):
    '''
    Function to generate desired inputs for SINCERITIES.
    If the folder/files under RunnerObj.datadir exist, 
    this function will not do anything.

    :param RunnerObj: An instance of the :class:`BLRun`
    '''
    if not RunnerObj.inputDir.joinpath("SINCERITIES").exists():
        print("Input folder for SINCERITIES does not exist, creating input folder...")
        RunnerObj.inputDir.joinpath("SINCERITIES").mkdir(exist_ok = False)
    
    
    ExpressionData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.exprData),
                                     header = 0, index_col = 0)
    PTData = pd.read_csv(RunnerObj.inputDir.joinpath(RunnerObj.cellData),
                             header = 0, index_col = 0)

    colNames = PTData.columns
    for idx in range(len(colNames)):
        # Select cells belonging to each pseudotime trajectory
        colName = colNames[idx]
        index = PTData[colName].index[PTData[colName].notnull()]
        exprName = "SINCERITIES/ExpressionData"+str(idx)+".csv"
        newExpressionData = ExpressionData.loc[:,index].T
        # Perform quantile binning as recommeded in the paper
        # http://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.qcut.html#pandas.qcut
        nBins = int(RunnerObj.params['nBins'])
        tQuantiles = pd.qcut(PTData.loc[index,colName], q = nBins, duplicates ='drop')
        mid = [(a.left + a.right)/2 for a in tQuantiles]

        newExpressionData['Time'] = mid
        newExpressionData.to_csv(RunnerObj.inputDir.joinpath(exprName),
                             sep = ',', header  = True, index = False) 
开发者ID:Murali-group,项目名称:Beeline,代码行数:36,代码来源:sinceritiesRunner.py

示例9: qcut_safe

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def qcut_safe(prices, q):
    nbins=min(q, len(prices))
    result = pd.qcut(prices, nbins, labels=np.arange(nbins) )

    return result 
开发者ID:rosetta-ai,项目名称:rosetta_recsys2019,代码行数:7,代码来源:utils.py

示例10: _add_bins

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def _add_bins(df, feats, n_bins=10):
    """Finds n_bins bins of equal size for each feature in dataframe and outputs the result as a dataframe.

    Parameters
    ----------
    df : pandas.DataFrame
        dataframe with features
    feats : list
        list of features you would like to consider for splitting into bins (the ones you want to evaluate NWOE, NIV etc for)
    n_bins = number of even sized (no. of data points) bins to use for each feature (this is chosen based on both t and c datasets)

    Returns
    ----------
    df_new : pandas.DataFrame
         original dataframe with bin intervals for each feature included as new columns (labelled as original column name + '_bin')
    """

    df_new = df.copy()

    for feat in feats:
        # check number of unique values of feature -- if low (close to the number of bins), we need to be careful
        num_unique_elements = len(df[feat].unique())

        # we should be more careful with how we make bins
        # we really want to make this independent of bins
        if num_unique_elements > n_bins*2: # x2 because we need intervals
            bin_intervals = pd.qcut(df[feat],n_bins,duplicates='drop') # !!! make sure there's nothing funny happening with duplicates
            # include bins in new column
            df_new[str(feat)+'_bin'] = bin_intervals
        else:
            df_new[str(feat)+'_bin'] = df_new[feat]

    return df_new 
开发者ID:wayfair,项目名称:pylift,代码行数:35,代码来源:base.py

示例11: y_transform

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def y_transform(Y, data, flatten):

    df_y = data[Y]

    # if user input 'int' then function will be "greater than value"
    # if user input 'float' then function will be IQR range

    # below is for case where prediction is true or false
    # but the y-feature is in different format (e.g continuous)

    if flatten == 'mean':
        df_y = pd.DataFrame(df_y >= df_y.mean())
    elif flatten == 'median':
        df_y = pd.DataFrame(df_y >= df_y.median())
    elif flatten == 'mode':
        df_y = pd.DataFrame(df_y >= df_y.mode()[0])
    elif type(flatten) == int:
        df_y = pd.DataFrame(df_y >= flatten)
    elif type(flatten) == float:
        df_y = pd.DataFrame(df_y >= df_y.quantile(flatten))

    # below is for case where the y-feature is converted in
    # to a categorical, either if it's a number or string.

    elif flatten == 'cat_string':
        df_y = pd.Categorical(df_y)
        df_y = pd.DataFrame(pd.Series(df_y).cat.codes)

    elif flatten == 'cat_numeric':
        df_y = pd.qcut(df_y, 5, duplicates='drop')
        df_y = pd.DataFrame(pd.Series(df_y).cat.codes)

    # for cases when y-feature is already in the format
    # where the prediction output will be.

    elif flatten == 'none':
        df_y = pd.DataFrame(df_y)

    return df_y 
开发者ID:autonomio,项目名称:autonomio,代码行数:41,代码来源:y_transform.py

示例12: build_column

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def build_column(self, data):
        col, operation, bins, labels = (
            self.cfg.get(p) for p in ["col", "operation", "bins", "labels"]
        )
        bins = int(bins)
        if operation == "cut":
            bin_data = pd.cut(data[col], bins=bins)
        else:
            bin_data = pd.qcut(data[col], q=bins)
        if labels:
            cats = {idx: str(cat) for idx, cat in enumerate(labels.split(","))}
        else:
            cats = {idx: str(cat) for idx, cat in enumerate(bin_data.cat.categories)}
        return pd.Series(bin_data.cat.codes.map(cats), index=data.index, name=self.name) 
开发者ID:man-group,项目名称:dtale,代码行数:16,代码来源:column_builders.py

示例13: build_code

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def build_code(self):
        col, operation, bins, labels = (
            self.cfg.get(p) for p in ["col", "operation", "bins", "labels"]
        )
        bins_code = []
        if operation == "cut":
            bins_code.append(
                "{name}_data = pd.cut(df['{col}'], bins={bins})".format(
                    name=self.name, col=col, bins=bins
                )
            )
        else:
            bins_code.append(
                "{name}_data = pd.qcut(df['{col}'], bins={bins})".format(
                    name=self.name, col=col, bins=bins
                )
            )
        if labels:
            labels_str = ", ".join(
                ["{}: {}".format(idx, cat) for idx, cat in enumerate(labels.split(","))]
            )
            labels_str = "{" + labels_str + "}"
            bins_code.append(
                "{name}_cats = {labels}".format(name=self.name, labels=labels_str)
            )
        else:
            bins_code.append(
                "{name}_cats = {idx: str(cat) for idx, cat in enumerate({name}_data.cat.categories)}"
            )
        s_str = "df.loc[:, '{name}'] = pd.Series({name}_data.cat.codes.map({name}_cats), index=df.index, name='{name}')"
        bins_code.append(s_str.format(name=self.name))
        return "\n".join(bins_code) 
开发者ID:man-group,项目名称:dtale,代码行数:34,代码来源:column_builders.py

示例14: add_returns_in_place

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def add_returns_in_place(df):  # modifies df
    close_prices_returns = compute_returns(df)
    num_bins = 10
    returns_bins = pd.qcut(close_prices_returns, num_bins)
    bins_categories = returns_bins.values.categories
    returns_labels = pd.qcut(close_prices_returns, num_bins, labels=False)

    df['close_price_returns'] = close_prices_returns
    df['close_price_returns_bins'] = returns_bins
    df['close_price_returns_labels'] = returns_labels

    return df, bins_categories 
开发者ID:philipperemy,项目名称:deep-learning-bitcoin,代码行数:14,代码来源:returns_quantization.py

示例15: fit

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import qcut [as 别名]
def fit(self, X, y=None):
        """
        Learns the limits of the equal frequency intervals, that is the 
        quantiles for each variable.
        
        Parameters
        ----------
        
        X : pandas dataframe of shape = [n_samples, n_features]
            The training input samples.
            Can be the entire dataframe, not just the variables to be transformed.
        y : None
            y is not needed in this encoder. You can pass y or None.

        Attributes
        ----------

        binner_dict_: dictionary
            The dictionary containing the {variable: interval limits} pairs used
            to sort the values into discrete intervals.
        """
        # check input dataframe
        X = super().fit(X, y)

        self.binner_dict_ = {}

        for var in self.variables:
            tmp, bins = pd.qcut(x=X[var], q=self.q, retbins=True, duplicates='drop')

            # Prepend/Append infinities to accommodate outliers
            bins = list(bins)
            bins[0] = float("-inf")
            bins[len(bins) - 1] = float("inf")
            self.binner_dict_[var] = bins

        self.input_shape_ = X.shape

        return self 
开发者ID:solegalli,项目名称:feature_engine,代码行数:40,代码来源:discretisers.py


注:本文中的pandas.qcut方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。