当前位置: 首页>>代码示例>>Python>>正文


Python pandas.value_counts方法代码示例

本文整理汇总了Python中pandas.value_counts方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.value_counts方法的具体用法?Python pandas.value_counts怎么用?Python pandas.value_counts使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.value_counts方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: spatial_heatmap

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def spatial_heatmap(array, path, title=None, color="Greens", figformat="png"):
    """Taking channel information and creating post run channel activity plots."""
    logging.info("Nanoplotter: Creating heatmap of reads per channel using {} reads."
                 .format(array.size))
    activity_map = Plot(
        path=path + "." + figformat,
        title="Number of reads generated per channel")
    layout = make_layout(maxval=np.amax(array))
    valueCounts = pd.value_counts(pd.Series(array))
    for entry in valueCounts.keys():
        layout.template[np.where(layout.structure == entry)] = valueCounts[entry]
    plt.figure()
    ax = sns.heatmap(
        data=pd.DataFrame(layout.template, index=layout.yticks, columns=layout.xticks),
        xticklabels="auto",
        yticklabels="auto",
        square=True,
        cbar_kws={"orientation": "horizontal"},
        cmap=color,
        linewidths=0.20)
    ax.set_title(title or activity_map.title)
    activity_map.fig = ax.get_figure()
    activity_map.save(format=figformat)
    plt.close("all")
    return [activity_map] 
开发者ID:wdecoster,项目名称:NanoPlot,代码行数:27,代码来源:spatial_heatmap.py

示例2: _analyze_field

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _analyze_field(self, df, id, output_folder=".", n_head=10):
        id = str(id)
        m = df.shape[1]
        df.columns = [str(i) for i in range(m)]

        agg_dict = dict()
        agg_dict[id] = "size"
        for i in range(int(id)):
            agg_dict[str(i)] = lambda x: ", ".join(
                pd.value_counts(x).index[:n_head])
        name_dict = dict()
        name_dict[id] = "count"
        df_analyze = df.groupby(id).agg(agg_dict).rename(
            columns=name_dict).reset_index()
        filename = join(output_folder, "column-%s-analyze.xlsx" % id)

        log = u""
        log += u"Tags         : {}\n".format(df_analyze.shape[0])
        tags = df_analyze[id].to_dict().values()
        tags = sorted(tags)
        log += u"List tags    : {}\n".format(u", ".join(tags))
        df_analyze.to_excel(filename, index=False)
        return log 
开发者ID:undertheseanlp,项目名称:underthesea,代码行数:25,代码来源:tc_.py

示例3: test_blockwise_shufflesplit

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def test_blockwise_shufflesplit():
    splitter = dask_ml.model_selection.ShuffleSplit(random_state=0)
    assert splitter.get_n_splits() == 10
    gen = splitter.split(dX)

    train_idx, test_idx = next(gen)
    assert isinstance(train_idx, da.Array)
    assert isinstance(test_idx, da.Array)

    assert train_idx.shape == (99,)  # 90% of 110
    assert test_idx.shape == (11,)

    assert train_idx.chunks == ((45, 45, 9),)
    assert test_idx.chunks == ((5, 5, 1),)

    counts = pd.value_counts(train_idx.compute())
    assert counts.max() == 1

    N = len(X)

    np.testing.assert_array_equal(
        np.unique(da.concatenate([train_idx, test_idx])), np.arange(N)
    ) 
开发者ID:dask,项目名称:dask-ml,代码行数:25,代码来源:test_split.py

示例4: _visualize_helper

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _visualize_helper(
    model_dir, output_dir, subsample_rate, save_type, ordering, external_legend_params
):
    logger.info("Generating figures")

    # Data
    metadata_df = pd.read_csv(os.path.join(model_dir, "metadata.csv"))
    cluster_ids = np.load(os.path.join(model_dir, "cluster_ids.npy"))
    metadata_df["ax_1"] = cluster_ids[:, 0]
    metadata_df["ax_2"] = cluster_ids[:, 1]
    metadata_df["opponent_id"] = metadata_df["opponent_id"].apply(ABBREVIATIONS.get)

    def save_path(prefix):
        return osp.join(output_dir, f"{prefix}.{save_type}")

    counts = pd.value_counts(metadata_df["opponent_id"])
    min_counts = counts.min()
    opponent_groups = metadata_df.groupby("opponent_id")
    opponent_dfs = {name: group.sample(n=min_counts) for name, group in opponent_groups}
    opponent_dfs = [opponent_dfs[label] for label in ordering]
    metadata_df = pd.concat(opponent_dfs)

    _plot_and_save_chart(save_path("combined"), [metadata_df])
    _plot_and_save_chart(save_path("subsampled"), [metadata_df.sample(frac=subsample_rate)])
    _plot_and_save_chart(save_path("sidebyside"), opponent_dfs)

    if external_legend_params is not None:
        _external_legend(osp.join(output_dir, "external_legend.pdf"))

    logger.info("Visualization complete") 
开发者ID:HumanCompatibleAI,项目名称:adversarial-policies,代码行数:32,代码来源:visualize.py

示例5: calc_tvd

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def calc_tvd(label_dict,attr):
    '''
    attr should be a 0,1 pandas dataframe with
    columns corresponding to label names

    for example:
    names=zip(*self.graph)[0]
    calc_tvd(label_dict,attr[names])

    label_dict should be a dictionary key:1d-array of samples
    '''
    ####Calculate Total Variation####
    if np.min(attr.values)<0:
        raise ValueError('calc_tvd received \
                 attr that may not have been in {0,1}')

    label_names=label_dict.keys()
    attr=attr[label_names]

    df2=attr.drop_duplicates()
    df2 = df2.reset_index(drop = True).reset_index()
    df2=df2.rename(columns = {'index':'ID'})
    real_data_id=pd.merge(attr,df2)
    real_counts = pd.value_counts(real_data_id['ID'])
    real_pdf=real_counts/len(attr)

    label_list_dict={k:np.round(v.ravel()) for k,v in label_dict.items()}
    df_dat=pd.DataFrame.from_dict(label_list_dict)
    dat_id=pd.merge(df_dat,df2,on=label_names,how='left')
    dat_counts=pd.value_counts(dat_id['ID'])
    dat_pdf = dat_counts / dat_counts.sum()
    diff=real_pdf.subtract(dat_pdf, fill_value=0)
    tvd=0.5*diff.abs().sum()
    return tvd 
开发者ID:mkocaoglu,项目名称:CausalGAN,代码行数:36,代码来源:pairwise.py

示例6: class_weights

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def class_weights(self):
        import pandas as pd
        label_freq = pd.value_counts(self.labels)
        class_weights = label_freq.median() / label_freq
        class_weights = class_weights.sort_index().values
        class_weights = torch.from_numpy(class_weights.astype(np.float32))
        return class_weights 
开发者ID:Erotemic,项目名称:ibeis,代码行数:9,代码来源:train_main.py

示例7: test_hash_uniformity

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def test_hash_uniformity(map_size_and_hashed_values):
    n, h = map_size_and_hashed_values

    k = len(h)
    num_bins = k//5  # Want about 5 items per bin for chi-squared
    bins = np.linspace(0, n + 1, num_bins)

    binned_data = pd.cut(h, bins)
    distribution = pd.value_counts(binned_data).sort_index()
    c, p = chisquare(distribution)

    assert p > 0.05, "Data not uniform" 
开发者ID:ihmeuw,项目名称:vivarium,代码行数:14,代码来源:test_randomness_index_map.py

示例8: columns_types

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def columns_types(self):
        return pd.value_counts(self.columns_stats.loc['types']) 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:4,代码来源:__init__.py

示例9: _get_deviation_of_mean

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _get_deviation_of_mean(self, series, multiplier=3):
        """
        Returns count of values deviating of the mean, i.e. larger than `multiplier` * `std`.
        :type series:
        :param multiplier:
        :return:
        """
        capped_series = np.minimum(
            series, series.mean() + multiplier * series.std())
        count = pd.value_counts(series != capped_series)
        count = count[True] if True in count else 0
        perc = self._percent(count / self.length)
        return count, perc 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:15,代码来源:__init__.py

示例10: _get_median_absolute_deviation

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _get_median_absolute_deviation(self, series, multiplier=3):
        """
        Returns count of values larger than `multiplier` * `mad`
        :type series:
        :param multiplier:
        :return (array):
        """
        capped_series = np.minimum(
            series, series.median() + multiplier * series.mad())
        count = pd.value_counts(series != capped_series)
        count = count[True] if True in count else 0
        perc = self._percent(count / self.length)
        return count, perc 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:15,代码来源:__init__.py

示例11: _get_categorical_summary

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _get_categorical_summary(self, column):
        series = self.df[column]
        # Only run if at least 1 non-missing value
        value_counts = series.value_counts()
        stats = {
            'top': '{}: {}'.format(value_counts.index[0], value_counts.iloc[0]),
        }
        return pd.concat([pd.Series(stats, name=column),
                          self.columns_stats[column]],
                         sort=True) 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:12,代码来源:__init__.py

示例12: _get_bool_summary

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _get_bool_summary(self, column):
        series = self.df[column]

        stats = {}
        for class_name, class_value in dict(series.value_counts()).items():
            stats['"{}" count'.format(class_name)] = '{}'.format(class_value)
            stats['"{}" perc'.format(class_name)] = '{}'.format(
                self._percent(class_value / self.length))

        return pd.concat([pd.Series(stats, name=column),
                          self.columns_stats[column]],
                         sort=True) 
开发者ID:mouradmourafiq,项目名称:pandas-summary,代码行数:14,代码来源:__init__.py

示例13: _analyze_first_token

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def _analyze_first_token(self, df, id, output_folder="."):
        filename = join(output_folder, "column-%s-analyze.xlsx" % id)
        df_analyze = df[id].value_counts().reset_index(name="count")
        df_analyze = df_analyze.rename(columns={"index": "0"})
        df_analyze.to_excel(filename, index=False)
        log = u""
        log += u"Unique words : {}\n".format(df_analyze.shape[0])
        log += u"Top words    : {}\n".format(
            u", ".join(list(df_analyze["0"].to_dict().values())[:20]))
        return log 
开发者ID:undertheseanlp,项目名称:underthesea,代码行数:12,代码来源:tc_.py

示例14: garbage_symbols

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def garbage_symbols(df: pd.DataFrame) -> Result:
    """Find unwanted symbols in `np.object` columns.

    Returns:
        A result containing item keys per field which contained any trash symbol
    """
    garbage = (
        r"(?P<spaces>^\s|\s$)"
        r"|(?P<html_entities>&[a-zA-Z]{2,}?;|&#\d*?;)"
        r"|(?P<css>[.#@][^\d{}#.\s][^{}#.]+?{(?:[^:;{}]+?:[^:;{}]+?;)+?\s*?})"
        r"|(?P<html_tags></??(?:h\d|b|u|i|div|ul|ol|li|table|tbody|th|tr|td|p|a|br|img|sup|SUP|"
        r"blockquote)\s*?/??>|<!--|-->)"
    )

    errors = {}
    row_keys: Set = set()
    rule_result = Result("Garbage Symbols", items_count=len(df))

    for column in tqdm(df.select_dtypes([np.object]).columns, desc="Garbage Symbols"):
        matches = df[column].apply(str).str.extractall(garbage, flags=re.IGNORECASE)
        if not matches.empty:
            error_keys = df.loc[matches.unstack().index.values].index
            bad_texts = matches.stack().value_counts().index.sort_values().tolist()
            # escape backslashes for markdown repr, `\n > \\n`
            bad_texts = [
                f"'{codecs.encode(bx, 'unicode_escape').decode()[:20]}'"
                for bx in bad_texts
            ]
            error = (
                f"{len(error_keys)/len(df)*100:.1f}% of '{column}' "
                f"values contain `{', '.join(bad_texts)}`"
            )

            errors[error] = list(error_keys)
            row_keys = row_keys.union(error_keys)
    if errors:
        rule_result.add_error(
            f"{len(row_keys)/len(df) * 100:.1f}% ({len(row_keys)}) items affected",
            errors=errors,
        )
    return rule_result 
开发者ID:scrapinghub,项目名称:arche,代码行数:43,代码来源:others.py

示例15: split_dataset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import value_counts [as 别名]
def split_dataset(dataset, seed):
    logger.info('Splitting the dataset')
    scaffolds = pd.value_counts(dataset['scaffold'])
    scaffolds = sorted(scaffolds.items(), key=lambda x: (-x[1], x[0]))
    test_scaffolds = set([x[0] for x in scaffolds[9::10]])
    dataset['SPLIT'] = 'train'
    test_scaf_idx = [x in test_scaffolds for x in dataset['scaffold']]
    dataset.loc[test_scaf_idx, 'SPLIT'] = 'test_scaffolds'
    test_idx = dataset.loc[dataset['SPLIT'] == 'train'].sample(
        frac=0.1, random_state=seed
    ).index
    dataset.loc[test_idx, 'SPLIT'] = 'test'
    dataset.drop('scaffold', axis=1, inplace=True)
    return dataset 
开发者ID:molecularsets,项目名称:moses,代码行数:16,代码来源:prepare_dataset.py


注:本文中的pandas.value_counts方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。