Python collections.Counter方法代码示例

本文整理汇总了Python中collections.Counter方法的典型用法代码示例。如果您正苦于以下问题：Python collections.Counter方法的具体用法？Python collections.Counter怎么用？Python collections.Counter使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类collections的用法示例。

在下文中一共展示了collections.Counter方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: value_counts

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def value_counts(self, subset, attr, value, base=False):
        """
        Get the number of currences per value of the dependent variable when
        the given attribute is equal to the given value.

        FIXME: Can attr/value be eliminated??

        Args:
            subset: the subset with which to act upon.
            attr: the attribute of the value.
            value: the value with which to track counts.
            base: whether or not to calculate values based on the dependent
                value (default False).
        Returns:
            A Counter instance detailing the number of occurrences per
            dependent variable.

        """
        counts = Counter()
        for row in subset:
            if row[attr] == value or base:
                counts[row[self.dependent]] += 1
        return counts

开发者ID:jayelm，项目名称:decisiontrees，代码行数:25，代码来源:dtree.py

示例2: attr_counts

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def attr_counts(self, subset, attr):
        """
        Get the number of occurrences per value of the given attribute

        Args:
            subset: the subset with which to act upon.
            attr: the selected attribute.
        Returns:
            A Counter instance detailing the number of occurrences per
            attribute value.

        """
        counts = Counter()
        for row in subset:
            counts[row[attr]] += 1
        return counts

开发者ID:jayelm，项目名称:decisiontrees，代码行数:18，代码来源:dtree.py

示例3: get_buckets

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def get_buckets(stories, max_ignore_unbatched=100, max_pad_amount=25):
    sentencecounts = [len(sents_graphs) for (sents_graphs, query, answer) in stories]
    countpairs = sorted(collections.Counter(sentencecounts).items())

    buckets = []
    smallest_left_val = 0
    num_unbatched = max_ignore_unbatched
    for val,ct in countpairs:
        num_unbatched += ct
        if val - smallest_left_val > max_pad_amount or num_unbatched > max_ignore_unbatched:
            buckets.append(val)
            smallest_left_val = val
            num_unbatched = 0
    if buckets[-1] != countpairs[-1][0]:
        buckets.append(countpairs[-1][0])

    return buckets

开发者ID:hexahedria，项目名称:gated-graph-transformer-network，代码行数:19，代码来源:ggtnn_graph_parse.py

示例4: _get_genome_amounts

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def _get_genome_amounts(self, probability, max_genome_amount):
		"""
		Get amounts of genomes by original genome

		@param probability: Proportion of simulated original genomes
		@type probability: int | long | float
		@param max_genome_amount: Total number of genomes
		@type max_genome_amount: int | long

		@return: List of integers representing amount of strains
		@rtype: list[int]
		"""
		assert isinstance(probability, (int, long, float))
		assert 0 <= probability <= 1
		assert isinstance(max_genome_amount, (int, long))

		genome_amounts = self._get_genome_amounts_geometric(probability, max_genome_amount)
		diverence = Counter(genome_amounts)[1] / float(len(genome_amounts))
		if max_genome_amount >= 10:
			while abs(diverence - probability) > 0.05:
				# print "need: {}, gotten: {}".format(probability, diverence)
				genome_amounts = self._get_genome_amounts_geometric(probability, max_genome_amount)
				diverence = Counter(genome_amounts)[1] / float(len(genome_amounts))
		return genome_amounts

开发者ID:CAMI-challenge，项目名称:CAMISIM，代码行数:26，代码来源:strainsimulationwrapper.py

示例5: build_vocab

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def build_vocab(words, vocab_size, visual_fld):
    """ Build vocabulary of VOCAB_SIZE most frequent words and write it to
    visualization/vocab.tsv
    """
    utils.safe_mkdir(visual_fld)
    file = open(os.path.join(visual_fld, 'vocab.tsv'), 'w')

    dictionary = dict()
    count = [('UNK', -1)]
    index = 0
    count.extend(Counter(words).most_common(vocab_size - 1))

    for word, _ in count:
        dictionary[word] = index
        index += 1
        file.write(word + '\n')

    index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    file.close()
    return dictionary, index_dictionary

开发者ID:wdxtub，项目名称:deep-learning-note，代码行数:22，代码来源:w2v_utils.py

示例6: generate_bi_graphemes_dictionary

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def generate_bi_graphemes_dictionary(label_list):
    freqs = Counter()
    for label in label_list:
        label = label.split(' ')
        for i in label:
            for pair in split_every(2, i):
                if len(pair) == 2:
                    freqs[pair] += 1


    with open('resources/unicodemap_en_baidu_bi_graphemes.csv', 'w') as bigram_label:
        bigramwriter = csv.writer(bigram_label, delimiter = ',')
        baidu_labels = list('\' abcdefghijklmnopqrstuvwxyz')
        for index, key in enumerate(baidu_labels):
            bigramwriter.writerow((key, index+1))
        for index, key in enumerate(freqs.keys()):
            bigramwriter.writerow((key, index+len(baidu_labels)+1))

开发者ID:awslabs，项目名称:dynamic-training-with-apache-mxnet-on-aws，代码行数:19，代码来源:stt_bi_graphemes_util.py

示例7: test_tokens_to_indices

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def test_tokens_to_indices():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    vocab = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>',
                                  reserved_tokens=None)

    i1 = vocab.to_indices('c')
    assert i1 == 1

    i2 = vocab.to_indices(['c'])
    assert i2 == [1]

    i3 = vocab.to_indices(['<unk>', 'non-exist'])
    assert i3 == [0, 0]

    i4 = vocab.to_indices(['a', 'non-exist', 'a', 'b'])
    assert i4 == [3, 0, 3, 2]

开发者ID:awslabs，项目名称:dynamic-training-with-apache-mxnet-on-aws，代码行数:19，代码来源:test_contrib_text.py

示例8: test_indices_to_tokens

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def test_indices_to_tokens():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    vocab = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1,
                                  unknown_token='<unknown>', reserved_tokens=None)
    i1 = vocab.to_tokens(1)
    assert i1 == 'c'

    i2 = vocab.to_tokens([1])
    assert i2 == ['c']

    i3 = vocab.to_tokens([0, 0])
    assert i3 == ['<unknown>', '<unknown>']

    i4 = vocab.to_tokens([3, 0, 3, 2])
    assert i4 == ['a', '<unknown>', 'a', 'b']

    assertRaises(ValueError, vocab.to_tokens, 100)

开发者ID:awslabs，项目名称:dynamic-training-with-apache-mxnet-on-aws，代码行数:20，代码来源:test_contrib_text.py

示例9: CountErrors

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def CountErrors(ocr_text, truth_text):
  """Counts the drops and adds between 2 bags of iterables.

  Simple bag of objects count returns the number of dropped and added
  elements, regardless of order, from anything that is iterable, eg
  a pair of strings gives character errors, and a pair of word lists give
  word errors.
  Args:
    ocr_text:    OCR text iterable (eg string for chars, word list for words).
    truth_text:  Truth text iterable.

  Returns:
    ErrorCounts named tuple.
  """
  counts = collections.Counter(truth_text)
  counts.subtract(ocr_text)
  drops = sum(c for c in counts.values() if c > 0)
  adds = sum(-c for c in counts.values() if c < 0)
  return ErrorCounts(drops, adds, len(truth_text), len(ocr_text))

开发者ID:ringringyi，项目名称:DOTA_models，代码行数:21，代码来源:errorcounter.py

示例10: _get_ngrams

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def _get_ngrams(segment, max_order):
  """Extracts all n-grams up to a given maximum order from an input segment.

  Args:
    segment: text segment from which n-grams will be extracted.
    max_order: maximum length in tokens of the n-grams returned by this
        methods.

  Returns:
    The Counter containing all n-grams up to max_order in segment
    with a count of how many times each n-gram occurred.
  """
  ngram_counts = collections.Counter()
  for order in range(1, max_order + 1):
    for i in range(0, len(segment) - order + 1):
      ngram = tuple(segment[i:i + order])
      ngram_counts[ngram] += 1
  return ngram_counts

开发者ID:akzaidi，项目名称:fine-lm，代码行数:20，代码来源:bleu_hook.py

示例11: _build_vocab

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def _build_vocab(generator, vocab_dir, vocab_name):
  """Build a vocabulary from examples.

  Args:
    generator: text generator for creating vocab.
    vocab_dir: directory where to save the vocabulary.
    vocab_name: vocab file name.

  Returns:
    text encoder.
  """
  vocab_path = os.path.join(vocab_dir, vocab_name)
  if not tf.gfile.Exists(vocab_path):
    data = []
    for line in generator:
      data.extend(line.split())
    counter = collections.Counter(data)
    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
    words, _ = list(zip(*count_pairs))
    encoder = text_encoder.TokenTextEncoder(None, vocab_list=words)
    encoder.store_to_file(vocab_path)
  else:
    encoder = text_encoder.TokenTextEncoder(vocab_path)
  return encoder

开发者ID:akzaidi，项目名称:fine-lm，代码行数:26，代码来源:babi_qa.py

示例12: _build_vocab

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def _build_vocab(filename, vocab_dir, vocab_name):
  """Reads a file to build a vocabulary.

  Args:
    filename: file to read list of words from.
    vocab_dir: directory where to save the vocabulary.
    vocab_name: vocab file name.

  Returns:
    text encoder.
  """
  vocab_path = os.path.join(vocab_dir, vocab_name)
  if not tf.gfile.Exists(vocab_path):
    with tf.gfile.GFile(filename, "r") as f:
      data = f.read().split()
    counter = collections.Counter(data)
    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
    words, _ = list(zip(*count_pairs))
    encoder = text_encoder.TokenTextEncoder(None, vocab_list=words)
    encoder.store_to_file(vocab_path)
  else:
    encoder = text_encoder.TokenTextEncoder(vocab_path)
  return encoder

开发者ID:akzaidi，项目名称:fine-lm，代码行数:25，代码来源:wikitext103.py

示例13: test_custom_reserved_tokens

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def test_custom_reserved_tokens(self):
    """Test that we can pass custom reserved tokens to SubwordTextEncoder."""
    corpus = "The quick brown fox jumps over the lazy dog"
    token_counts = collections.Counter(corpus.split(" "))

    start_symbol = "<S>"
    end_symbol = "<E>"
    reserved_tokens = text_encoder.RESERVED_TOKENS + [start_symbol,
                                                      end_symbol]
    encoder = text_encoder.SubwordTextEncoder.build_to_target_size(
        10, token_counts, 2, 10, reserved_tokens=reserved_tokens)

    # Make sure that reserved tokens appear in the right places.
    self.assertEqual(encoder.decode([2]), start_symbol)
    self.assertEqual(encoder.decode([3]), end_symbol)

    # Make sure that we haven't messed up the ability to reconstruct.
    reconstructed_corpus = encoder.decode(encoder.encode(corpus))
    self.assertEqual(corpus, reconstructed_corpus)

开发者ID:akzaidi，项目名称:fine-lm，代码行数:21，代码来源:text_encoder_test.py

示例14: test_encodable_when_not_in_alphabet

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def test_encodable_when_not_in_alphabet(self):
    corpus = "the quick brown fox jumps over the lazy dog"
    token_counts = collections.Counter(corpus.split(" "))

    encoder = text_encoder.SubwordTextEncoder.build_to_target_size(
        100, token_counts, 2, 10)
    original = "This has UPPER CASE letters that are out of alphabet"

    # Early versions could have an infinite loop when breaking into subtokens
    # if there was any out-of-alphabet characters in the encoded string.
    encoded = encoder.encode(original)
    decoded = encoder.decode(encoded)

    self.assertEqual(original, decoded)
    encoded_str = "".join(encoder.all_subtoken_strings[i] for i in encoded)
    self.assertIn("\\84;", encoded_str)

开发者ID:akzaidi，项目名称:fine-lm，代码行数:18，代码来源:text_encoder_test.py

示例15: test_reserved_token_chars_not_in_alphabet

# 需要导入模块: import collections [as 别名]
# 或者: from collections import Counter [as 别名]
def test_reserved_token_chars_not_in_alphabet(self):
    corpus = "dog"
    token_counts = collections.Counter(corpus.split(" "))
    encoder1 = text_encoder.SubwordTextEncoder.build_to_target_size(
        100, token_counts, 2, 100)
    filename = os.path.join(self.test_temp_dir, "out.voc")
    encoder1.store_to_file(filename)
    encoder2 = text_encoder.SubwordTextEncoder(filename=filename)

    self.assertEqual(encoder1._alphabet, encoder2._alphabet)

    for t in text_encoder.RESERVED_TOKENS:
      for c in t:
        # Verify that encoders can encode all reserved token chars.
        encoder1.encode(c)
        encoder2.encode(c)

开发者ID:akzaidi，项目名称:fine-lm，代码行数:18，代码来源:text_encoder_test.py

注：本文中的collections.Counter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。