當前位置: 首頁>>代碼示例>>Python>>正文


Python UnigramTagger.tag方法代碼示例

本文整理匯總了Python中nltk.tag.UnigramTagger.tag方法的典型用法代碼示例。如果您正苦於以下問題:Python UnigramTagger.tag方法的具體用法?Python UnigramTagger.tag怎麽用?Python UnigramTagger.tag使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.tag.UnigramTagger的用法示例。


在下文中一共展示了UnigramTagger.tag方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: pos_tag

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
def pos_tag(pos_type, tokenized_sent):
	if pos_type == 'unigram':
		brown_train = pickle.load(open('res/brown_train.pkl', 'rb'))
		unigram_tagger = UnigramTagger(brown_train)
		return unigram_tagger.tag(tokenized_sent)
	elif pos_type == 'max_pos':
		return nltk.pos_tag(tokenized_sent)		
開發者ID:merkhofer,項目名稱:parsel,代碼行數:9,代碼來源:nltk_magic.py

示例2: tag_words

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
	def tag_words(self, words, sents):
		train_sents = treebank.tagged_sents()
		tagger = UnigramTagger(train_sents)
		test_sents = tagger.tag(sents[0])
		# test_sents = treebank.tagged_sents()[3000:]
		# print treebank.tagged_sents()[1:]
		# print "accuracy: " + str(self._tagger.evaluate(test_sents))
		# print self._tagger.tag(words)
		# print test_sents
		print tagger.evaluate(test_sents)
開發者ID:jayvachon,項目名稱:managerisk-reflection-search,代碼行數:12,代碼來源:sentiment-analysis.py

示例3: tag_penn

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
def tag_penn(words):
    """
    Tokenizes text by using a Penn Treebank tagged sentence and word tokenizer.

    Parameters
    ----------
    words: A list of strings.

    Returns
    -------
    A list of tuples of (str, str)
    """

    pt_tagger = UnigramTagger(treebank.tagged_sents())
    tags = pt_tagger.tag(words)

    return tags
開發者ID:nwngeek212,項目名稱:NaturalLanguageProcessing,代碼行數:19,代碼來源:helper.py

示例4: tag_linked

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
def tag_linked(words, default_tag='INFO'):
    """
    Tokenizes text by using a Penn Treebank tagged sentence and word tokenizers.
    Uses DefaultTagger to assign "default_tag" to any element missed by Penn Treebank tagger.

    Parameters
    ----------
    words: A list of strings.

    Returns
    -------
    A list of tuples of (str, str)
    :param default_tag:
    """

    default_tagger = DefaultTagger(default_tag)
    pt_tagger = UnigramTagger(treebank.tagged_sents())

    pt_tagger._taggers = [pt_tagger, default_tagger]

    tags = pt_tagger.tag(words)

    return tags
開發者ID:nwngeek212,項目名稱:NaturalLanguageProcessing,代碼行數:25,代碼來源:helper.py

示例5: PyTenseShift

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
class PyTenseShift(object):

    """Initialization of PyTenseShift objects.
    
    The important part when you use the PlPyTenseShift is that
    we allow you to implmenent your own Tagger to optimize your
    results in translating from present to past tense. So, you need
    to implement the taggerinterface and change the second line of
    this code
    """
    def __init__(self, corpus, isPl):
        if isPl:
            self.tagger = FirstTagger(corpus)
        else:
            dtag = DefaultTagger("NN")
            self.__utag = UnigramTagger(corpus.tagged_sents(), backoff = dtag)

    """ Tokenize the input sentence into words.
    This kind of representation is better to evaluate.
    
    """
    def _tokenize(self, tense, isPl):
        if isPl:
            return self.tagger.tag(tense)
        else:
            return self.__utag.tag(tokenize(tense))

    def getPastTense(self, tense):
        """Translates sentence given in present tense into past tense 
        
        Args:
            sentence (str): Sentence to translate
        Returns:
            str. Sentence in past tense
        """
        raise NotImplementedError("abstract method")
開發者ID:perfidia,項目名稱:pytenseshift,代碼行數:38,代碼來源:__init__.py

示例6: UnigramTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
import nltk
import json

from nltk.corpus import brown
from nltk.tag import UnigramTagger
tagger = UnigramTagger(brown.tagged_sents(tagset='universal'))
sent = ['Mitchell', 'decried', 'the', 'high', 'rate', 'of', 'unemployment']
for word, tag in tagger.tag(sent):
	if tag == "VERB":
		print(word, '->', tag)


verbs_tagged = open("../assets/inputText/verbs_tagged_questions.txt", 'w+')
with open("../assets/inputText/all_questions.txt", 'r') as all_lines:
	for line in all_lines:
		splitLine = line.split(' ')
		for word, tag in tagger.tag(splitLine):
			if tag == "VERB":
				verbs_tagged.write(word + "\n")
				#verbs_tagged.write(word + " \"" + line[:-1] + "\"\n")
				



開發者ID:diana-wang,項目名稱:NLP_Research,代碼行數:22,代碼來源:unigramTagging.py

示例7: UnigramTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
import nltk
from nltk.corpus import brown
from nltk.tag import UnigramTagger
tagger = UnigramTagger(brown.tagged_sents(categories='news')[:700])
sentence = ['John','and','Smith','went','to','NY','and','Germany']
for word, tag in tagger.tag(sentence):
    print(word,'->',tag)
開發者ID:xenron,項目名稱:sandbox-da-python,代碼行數:9,代碼來源:ch6_12.py

示例8: UnigramTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division
from nltk.tag import UnigramTagger


if __name__ == '__main__':
    model = {u'Péter': 'N', 'Enikő': 'N', 'szeret': 'V', 'Marit': 'Nacc'}
    tagger = UnigramTagger(model=model)

    print(tagger.tag(['Péter', 'Enikő', 'szeret', 'Marit']))


開發者ID:davidpgero,項目名稱:hungarian-nltk,代碼行數:13,代碼來源:unigramm_tagger.py

示例9: FirstTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
class FirstTagger(TaggerInterface):
    
    def __init__(self, corpus):
        dtag = DefaultTagger("NN")
        self.__utag = UnigramTagger(corpus.tagged_sents(), backoff = dtag)
        
    def tag(self, tense):
        """Does translation from tag generated by tagger into unified format
        
            Args:
                sentence: list of touple (word and its form) which are after verb
            Returns:
                list of touple (word and its form in unified format)
        """
        words = self.__utag.tag(tokenize(tense))
        
        for i, (word, form) in enumerate(words):
            word_info = {}
            
            if form[0] == 'V': word_info['klasa'] = 'czasownik'
            elif form[0] == 'S': word_info['klasa'] = 'rzeczownik'
            elif form[0] == 'A': word_info['klasa'] = 'przymiotnik'
            elif form[0] == 'N': word_info['klasa'] = 'liczebnik'
            elif form[0] == 'Z': word_info['klasa'] = 'zaimek'
            elif form[0] == 'D': word_info['klasa'] = 'przysłówek'
            elif form[0] == 'P': word_info['klasa'] = 'przyimek'
            elif form[0] == 'C': word_info['klasa'] = 'spójnik'
            elif form[0] == 'I': word_info['klasa'] = 'wykrzyknik'
            elif form[0] == 'T': word_info['klasa'] = 'partykuła'
            else: word_info['klasa'] = 'nieznany'
            
            if form[1] == 'S': word_info['liczba'] = 'pojedyńcza'
            elif form[1] == 'P': word_info['liczba'] = 'mnoga'
            
            if(len(form) >= 3):
                if form[2] == 'N': word_info['przypadek'] = 'mianownik'
                elif form[2] == 'G': word_info['przypadek'] = 'dopełniacz'
                elif form[2] == 'D': word_info['przypadek'] = 'celownik'
                elif form[2] == 'A': word_info['przypadek'] = 'biernik'
                elif form[2] == 'I': word_info['przypadek'] = 'narzędnik'
                elif form[2] == 'L': word_info['przypadek'] = 'miejscownik'
                elif form[2] == 'V': word_info['przypadek'] = 'wołacz'
            
            if(len(form) >= 4):
                if form[3] == 'M': word_info['rodzaj'] = 'm'
                elif form[3] == 'P': word_info['rodzaj'] = 'm'
                elif form[3] == 'A': word_info['rodzaj'] = 'm'
                elif form[3] == 'I': word_info['rodzaj'] = 'm'
                elif form[3] == 'F': word_info['rodzaj'] = 'ż'
                elif form[3] == 'N': word_info['rodzaj'] = 'n'
                elif form[3] == 'O': word_info['rodzaj'] = 'm'
                elif form[3] == 'R': word_info['rodzaj'] = 'ż'
                elif form[3] == 'T': word_info['rodzaj'] = 'ż'
            if(len(form) >= 6):
                if form[5] == '1': word_info['osoba'] = 'pierwsza'
                elif form[5] == '2': word_info['osoba'] = 'druga'
                elif form[5] == '3': word_info['osoba'] = 'trzecia'
                elif form[5] == 'I': word_info['osoba'] = 'bezokolicznik'
                elif form[5] == 'B': word_info['osoba'] = 'bezosobnik'
                elif form[5] == 'U': word_info['osoba'] = 'imiesłów'
                elif form[5] == 'W': word_info['osoba'] = 'imiesłów'
            if(len(form) >= 7):
                if form[6] == 'T': word_info['czas'] = 'teraźniejszy'
                elif form[6] == 'P': word_info['czas'] = 'przeszły'
                elif form[6] == 'F': word_info['czas'] = 'przyszły'
            if(len(form) >= 8):
                if form[7] == 'O': word_info['tryb'] = 'oznajmujący'
                elif form[7] == 'P': word_info['tryb'] = 'przypuszczający'
                elif form[7] == 'R': word_info['tryb'] = 'rozkazujący'
            if(len(form) >= 9):
                if form[8] == 'D': word_info['aspekt'] = 'dokonane'
                elif form[8] == 'N': word_info['aspekt'] = 'niedokonane'
            
            words[i] = (words[i][0], word_info)
        
        return words
開發者ID:perfidia,項目名稱:pytenseshift,代碼行數:78,代碼來源:firsttagger.py

示例10:

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
######## UNIGRAM TAGGER ##########

from nltk.tag import UnigramTagger
from nltk.corpus import treebank

#We use the first 3000 sentences of the treebank corpus as the training set to initialize
#the UnigramTagger class
#Unigram tagger can be trained by giving it a list of tagged sentences at initialization.
train_sents=treebank.tagged_sents()[:3000]
tagger=UnigramTagger(train_sents)
print treebank.sents()[0]
print tagger.tag(treebank.sents()[0])

test_sents=treebank.tagged_sents()[3000:]
print tagger.evaluate(test_sents)



tagger=UnigramTagger(model={'Pierre':'NN'})
tagger.tag(treebank.sents())[0]
開發者ID:bindaasamit,項目名稱:pycode,代碼行數:22,代碼來源:tutPosTagging02.py

示例11: TaggerOnline

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
def TaggerOnline(tokens):
	etiq1 = DefaultTagger('N')
	sentencas_treinadoras = mac_morpho.tagged_sents()[::]
	etiq2 = UnigramTagger(sentencas_treinadoras, backoff=etiq1)
	tagsTokens = etiq2.tag(tokens)
	return tagsTokens
開發者ID:roneysco,項目名稱:TopX,代碼行數:8,代碼來源:taggerManager.py

示例12: UnigramTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
print rt.evaluate(test_data)
print rt.tag(tokens)


## N gram taggers
from nltk.tag import UnigramTagger
from nltk.tag import BigramTagger
from nltk.tag import TrigramTagger

ut = UnigramTagger(train_data)
bt = BigramTagger(train_data)
tt = TrigramTagger(train_data)

print ut.evaluate(test_data)
print ut.tag(tokens)

print bt.evaluate(test_data)
print bt.tag(tokens)

print tt.evaluate(test_data)
print tt.tag(tokens)

def combined_tagger(train_data, taggers, backoff=None):
    for tagger in taggers:
        backoff = tagger(train_data, backoff=backoff)
    return backoff

ct = combined_tagger(train_data=train_data, 
                     taggers=[UnigramTagger, BigramTagger, TrigramTagger],
                     backoff=rt)
開發者ID:000Nelson000,項目名稱:text-analytics-with-python,代碼行數:32,代碼來源:pos_tagging.py

示例13: print

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
import nltk
from nltk.tag import UnigramTagger
from nltk.corpus import treebank
training= treebank.tagged_sents()[:7000]
unitagger=UnigramTagger(training)
print(treebank.sents()[0])
print(unitagger.tag(treebank.sents()[0]))
開發者ID:PacktPublishing,項目名稱:Mastering-Natural-Language-Processing-with-Python,代碼行數:9,代碼來源:ch4_16.py

示例14: UnigramTagger

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
#tagger.batch_tag([['Hello', 'world', '.'], ['How', 'are', 'you', '?']])

#--------------------------------------------------------------------------------
# Training taggers
#--------------------------------------------------------------------------------

# so far so good.  Next have to train taggers.

# Unigram, training on Treebank corpus
from nltk.tag import UnigramTagger
from nltk.corpus import treebank
train_sents = treebank.tagged_sents()[:3000]
unigram_tagger = UnigramTagger(train_sents)

# try it on our word_list.
unigram_tagger.tag( word_list )

# Backoff taggers - hierarchy of taggers, first tags all it can, then next takes
#    a stab at all with tag of None, then next, etc.

# Unigram with Default as backoff:
train_sents = treebank.tagged_sents()
unigram_tagger = UnigramTagger( train_sents, backoff = default_tagger )

# Add in contextual taggers:
# - bigram - current word plus previous token.
# - trigram - current word plus previous two tokens.
from nltk.tag import BigramTagger, TrigramTagger
bitagger = BigramTagger( train_sents )
tritagger = TrigramTagger( train_sents )
開發者ID:byouloh,項目名稱:sourcenet,代碼行數:32,代碼來源:nltk.py

示例15: sleep

# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag [as 別名]
    while not redisInterface.hasPending():
        sleep(1)

    page = redisInterface.popPending()
    print 'Reading ' + page + ' STARTED'

    # Read the html page
    with open(page, 'r') as htmlPage:
        data = htmlPage.read().replace('\n', '');

    # Parse html
    soup = BeautifulSoup(data)
    articleTitle = titleFromArticleSoup(soup)
    articleBodyWithTags = soup.find_all('p', class_ = 'story-body-text')
    articleBody = [stringFromHTMLParagraph(p)
            for p in articleBodyWithTags]
    parasToProcess = [articleTitle] + articleBody

    print 'Title: ' + articleTitle

    # Tokenize and tag
    tokens = [tokenizer.tokenize(s) for s in parasToProcess]
    taggedArticleBody = [tagger.tag(t) for t in tokens]

    # Save to redis
    redisInterface.saveArticleData(
            TaggedArticle(articleTitle, taggedArticleBody,'article_data'))

    print 'Reading ' + page + ' FINISHED'
開發者ID:HarshitBangar,項目名稱:Feed-Fetcher,代碼行數:31,代碼來源:reader.py


注:本文中的nltk.tag.UnigramTagger.tag方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。