Python brown.tagged_sents函数代码示例

本文整理汇总了Python中nltk.corpus.brown.tagged_sents函数的典型用法代码示例。如果您正苦于以下问题：Python tagged_sents函数的具体用法？Python tagged_sents怎么用？Python tagged_sents使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了tagged_sents函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    # run Simple unigram tagger
    brown_news_tagged = brown.tagged_sents(categories='news')
    brown_train = brown_news_tagged[100:]
    brown_test = brown_news_tagged[:100]

    nn_tagger = nltk.DefaultTagger('NN')
    ut2 = nltk.UnigramTagger(brown_train, backoff=nn_tagger)
    simpleUnigramTagger = SimpleUnigramTagger(brown_train, backoff=nn_tagger)
    print 'Simple Unigram tagger accuracy: %4.1f%%' % ( 100.0 * simpleUnigramTagger.evaluate(brown_test))
    print 'Unigram tagger accuracy: %4.1f%%' % ( 100.0 * ut2.evaluate(brown_test))

    # run affix tagger with entropy
    brown_news_tagged = brown.tagged_sents(categories='news')
    brown_train = brown_news_tagged[:int(0.8*len(brown_news_tagged))]
    rest = brown_news_tagged[int(0.8*len(brown_news_tagged)):]
    brown_development = rest[:int(0.5*len(rest))]
    brown_test = rest[int(0.5*len(rest)):]
    
    affix_tagger = nltk.AffixTagger(brown_train, backoff= nltk.DefaultTagger('NN') , cutoff=2)
    nltk.AffixTagger._train = _train
    nltk.AffixTagger.H = _H
    optcutoff = optimize_parameter()
    print "the optimal cutoff param is: %d " % optcutoff 
    affix_tagger2 = nltk.AffixTagger(brown_train, backoff= nltk.DefaultTagger('NN') , cutoff=optcutoff)

    print 'Unigram tagger accuracy: %4.1f%%' % ( 100.0 * affix_tagger.evaluate(brown_test))
    print 'Unigram tagger accuracy with entropy: %4.1f%%' % ( 100.0 * affix_tagger2.evaluate(brown_test))

开发者ID:atiassa，项目名称:recommend-2011，代码行数:28，代码来源:q2.py

示例2: init

   def __init__(self):
      '''initialize and train brill and naive bayes classifiers'''
     
      #TODO: Fix bug where it loads tagger from calling module dir
      if exists(file):
         input = open(file, 'rb')
         self.classifier = load(input)
         input.close()
         print 'Successfully loaded saved classifier'
         return

      self.bayes = NaiveBayesTagger()
      boundary = int(len(brown.tagged_sents())*0.8)
      train = brown.tagged_sents(simplify_tags=True)[:boundary]

      brill_trainer = FastBrillTaggerTrainer(initial_tagger = self.bayes,
                                             templates = templates,
                                             trace = 3,
                                             deterministic = True)
         
      self.classifier = brill_trainer.train(train, max_rules=10)
         
      print 'Saving Taggers to file: "pos_tagger.pickle"'
      output = open(file, 'wb')
      dump(self.classifier, output, 1)
      output.close()

开发者ID:okoye，项目名称:sentimentanalysis，代码行数:26，代码来源:speechtagger.py

示例3: demo

def demo(train_size=100, test_size=100, java_home=None, mallet_home=None):
    from nltk.corpus import brown
    import textwrap

    # Define a very simple feature detector
    def fd(sentence, index):
        word = sentence[index]
        return dict(word=word, suffix=word[-2:], len=len(word))

    # Let nltk know where java & mallet are.
    nltk.internals.config_java(java_home)
    nltk.classify.mallet.config_mallet(mallet_home)

    # Get the training & test corpus.  We simplify the tagset a little:
    # just the first 2 chars.
    def strip(corpus): return [[(w, t[:2]) for (w,t) in sent]
                               for sent in corpus]
    brown_train = strip(brown.tagged_sents(categories='news')[:train_size])
    brown_test = strip(brown.tagged_sents(categories='editorial')[:test_size])

    crf = MalletCRF.train(fd, brown_train, #'/tmp/crf-model',
                          transduction_type='VITERBI')
    sample_output = crf.tag([w for (w,t) in brown_test[5]])
    acc = nltk.tag.accuracy(crf, brown_test)
    print('\nAccuracy: %.1f%%' % (acc*100))
    print('Sample output:')
    print(textwrap.fill(' '.join('%s/%s' % w for w in sample_output),
                        initial_indent='  ', subsequent_indent='  ')+'\n')

    # Clean up
    print('Clean-up: deleting', crf.filename)
    os.remove(crf.filename)

    return crf

开发者ID:BohanHsu，项目名称:developer，代码行数:34，代码来源:crf.py

示例4: demo

def demo(train_size=100, test_size=100, java_home="/usr/local/jdk1.5.0/", mallet_home="/usr/local/mallet-0.4"):
    from nltk.corpus import brown
    import textwrap

    # Define a very simple feature detector
    def fd(sentence, index):
        word = sentence[index]
        return dict(word=word, suffix=word[-2:], len=len(word))

    # Let nltk know where java & mallet are.
    nltk.internals.config_java(java_home)
    nltk.classify.mallet.config_mallet(mallet_home)

    # Get the training & test corpus.  We simplify the tagset a little:
    # just the first 2 chars.
    def strip(corpus):
        return [[(w, t[:2]) for (w, t) in sent] for sent in corpus]

    brown_train = strip(brown.tagged_sents(categories="news")[:train_size])
    brown_test = strip(brown.tagged_sents(categories="editorial")[:test_size])

    crf = MalletCRF.train(fd, brown_train, transduction_type="VITERBI")  #'/tmp/crf-model',
    sample_output = crf.tag([w for (w, t) in brown_test[5]])
    acc = nltk.tag.accuracy(crf, brown_test)
    print "\nAccuracy: %.1f%%" % (acc * 100)
    print "Sample output:"
    print textwrap.fill(
        " ".join("%s/%s" % w for w in sample_output), initial_indent="  ", subsequent_indent="  "
    ) + "\n"

    # Clean up
    print "Clean-up: deleting", crf.filename
    os.remove(crf.filename)

    return crf

开发者ID:sneilan，项目名称:EverythingIveDoneOverTheYears，代码行数:35，代码来源:crf.py

示例5: training_sentences

def training_sentences(use=1.0, categories=[]):
	"""returns a training sentence set: [[(word, tag), ..], [(word, tag), ..], ..]"""
	if len(categories) == 0:
		categories = brown.categories() # use all of the brown categories
	sents = []
	for category in categories:
		total = len(brown.tagged_sents(categories=category))
		max = int((1-TEST_PROPORTION) * use * total) - 1 # use the first n sentences for training
		sents += brown.tagged_sents(categories=category, simplify_tags=True)[0:max]
	return sents

开发者ID:jyzhang，项目名称:py-nlp，代码行数:10，代码来源:pos.py

示例6: test_sentences

def test_sentences(categories=[]):
	"""returns a test sentence set: [[(word, tag), ..], [(word, tag), ..], ..]"""
	if len(categories) == 0:
		categories = brown.categories() # use all of the brown categories
	sents = []
	for category in categories:
		total = len(brown.tagged_sents(categories=category))
		start = int(TEST_PROPORTION * total) # use the last k sentences for test
		sents += brown.tagged_sents(categories=category, simplify_tags=True)[-start:-1]
	return sents

开发者ID:jyzhang，项目名称:py-nlp，代码行数:10，代码来源:pos.py

示例7: exercise2

def exercise2():
    print
    print "Exercise 2:"
    brown_news_tagged_sents = bn.tagged_sents(categories = 'news')
    brown_lore_tagged_sents = bn.tagged_sents(categories = 'lore')
    trigram_tagger = nltk.TrigramTagger(brown_news_tagged_sents)
    brown_news_eval = trigram_tagger.evaluate(brown_news_tagged_sents)
    brown_lore_eval = trigram_tagger.evaluate(brown_lore_tagged_sents)
    print "Evaluation of the trigram tagger on 'News': %f " % brown_news_eval
    print "Evaluation of the trigram tagger on 'Lore': %f " % brown_lore_eval
    print

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:11，代码来源:Girish_Srinivas_ch5b.py

示例8: precisionRecall

def precisionRecall():

    def tag_list(tagged_sents):
        return [tag for sent in tagged_sents for (word, tag) in sent]

    def apply_tagger(tagger, corpus):
        return [tagger.tag(nltk.tag.untag(sent)) for sent in corpus]

    gold = tag_list(brown.tagged_sents(categories='editorial'))
    test = tag_list(apply_tagger(t2, brown.tagged_sents(categories='editorial')))
    cm = nltk.ConfusionMatrix(gold, test)
    print cm.pp(sort_by_count=True, show_percents=True, truncate=9)

开发者ID:AkiraKane，项目名称:Python，代码行数:12，代码来源:c06_evaluation.py

示例9: evaluate

   def evaluate(self):
      '''run tests on conll2000 and treebank data'''

      test = treebank.tagged_sents()[:100]
      treebank_result = (100*self.classifier.evaluate(test))

      test = conll2000.tagged_sents()[:100]
      conll2000_result = (100*self.classifier.evaluate(test))

      test = brown.tagged_sents()[int(len(brown.tagged_sents())*0.8):]
      brown_result = (100*self.classifier.evaluate(test))

      return (treebank_result, conll2000_result, brown_result)

开发者ID:okoye，项目名称:sentimentanalysis，代码行数:13，代码来源:speechtagger.py

示例10: testSet

def testSet():

    tagged_sents = list(brown.tagged_sents(categories='news'))
    random.shuffle(tagged_sents)
    size = int(len(tagged_sents) * 0.1)
    train_set, test_set = tagged_sents[size:], tagged_sents[:size]

    file_ids = brown.fileids(categories='news')
    size = int(len(file_ids) * 0.1)
    train_set = brown.tagged_sents(file_ids[size:])
    test_set = brown.tagged_sents(file_ids[:size])

    train_set = brown.tagged_sents(categories='news')
    test_set = brown.tagged_sents(categories='fiction')

开发者ID:AkiraKane，项目名称:Python，代码行数:14，代码来源:c06_evaluation.py

示例11: get_tagged_tokens

 def get_tagged_tokens(self, corpus=TAGGED, testing=False):
     """This tokenizes, segments, and tags all the files in a directory."""
     if testing:
         # train against a smaller version of the corpus so that it
         # doesn't take years during testing.
         tagger = build_trainer(brown.tagged_sents(categories='news'))
     else:
         tagger = build_trainer(brown.tagged_sents())
     tokens_and_spans = self.tokenize_corpus(corpus)
     tagged_spanned_tokens = tag_token_spans(
         tokens_and_spans,
         tagger,
     )
     return tagged_spanned_tokens

开发者ID:bmw9t，项目名称:woolf，代码行数:14，代码来源:fset_manager.py

示例12: exercise1

def exercise1():
    print
    print "Exercise 1:"
    brown_news_tagged_sents = bn.tagged_sents(categories = 'news')
    brown_lore_tagged_sents = bn.tagged_sents(categories = 'lore')
    unigram_tagger = nltk.UnigramTagger(brown_news_tagged_sents)
    brown_news_eval = unigram_tagger.evaluate(brown_news_tagged_sents)
    brown_lore_eval = unigram_tagger.evaluate(brown_lore_tagged_sents)
    print "Evaluation of the unigram tagger on 'News': %f " % brown_news_eval
    print "Evaluation of the unigram tagger on 'Lore': %f " % brown_lore_eval
    brown_lore = bn.sents(categories = 'lore')
    b_lore = unigram_tagger.tag(brown_lore[200])
    print "Tagged words for 200th sentence of 'Brown' corpus of category 'Lore' is: "
    print b_lore
    print

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:15，代码来源:Girish_Srinivas_ch5a.py

示例13: create_tagger

def create_tagger():
    """Train a tagger from the Brown Corpus. This should not be called very
    often; only in the event that the tagger pickle wasn't found."""
    print "Building tagger..."
    train_sents = brown.tagged_sents()

    # These regexes were lifted from the NLTK book tagger chapter.
    t0 = nltk.RegexpTagger(
        [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers
         (r'(The|the|A|a|An|an)$', 'AT'), # articles
         (r'.*able$', 'JJ'),              # adjectives
         (r'.*ness$', 'NN'),              # nouns formed from adjectives
         (r'.*ly$', 'RB'),                # adverbs
         (r'.*s$', 'NNS'),                # plural nouns
         (r'.*ing$', 'VBG'),              # gerunds
         (r'.*ed$', 'VBD'),               # past tense verbs
         (r'.*', 'NN')                    # nouns (default)
        ])
    print "got t0"

    t1 = nltk.UnigramTagger(train_sents, backoff=t0)
    print "got t1"

    t2 = nltk.BigramTagger(train_sents, backoff=t1)
    print "got t2"

    t3 = nltk.TrigramTagger(train_sents, backoff=t2)
    print "Built tagger!"
    return t3

开发者ID:Jacob33123，项目名称:narorumo，代码行数:29，代码来源:postagger.py

示例14: read_datas

 def read_datas(self):
     brown_tagged_sentence  = brown.tagged_sents()
     brown_sent = brown.sents()
     size = int(len(brown_tagged_sentence) * 0.9)
     train_set =  brown_tagged_sentence[:size]
     test_set = brown_tagged_sentence[size:]
     return (train_set,test_set)

开发者ID:Nicolas99-9，项目名称:TERApprentissage，代码行数:7，代码来源:tagger.py

示例15: ch05_11_train_test_affix_tagger

def ch05_11_train_test_affix_tagger():
  from nltk.corpus import brown
  fd = nltk.FreqDist(brown.words(categories="news"))
  cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories="news"))
  most_freq_pos = dict((word, cfd[word].max()) for word in fd.keys())
  affix_tagger = nltk.AffixTagger(model=most_freq_pos)
  print affix_tagger.evaluate(brown.tagged_sents(categories="editorial"))

开发者ID:447327642，项目名称:nltk-examples，代码行数:7，代码来源:ch05_ex.py

注：本文中的nltk.corpus.brown.tagged_sents函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。