Python brown.tagged_words函数代码示例

本文整理汇总了Python中nltk.corpus.brown.tagged_words函数的典型用法代码示例。如果您正苦于以下问题：Python tagged_words函数的具体用法？Python tagged_words怎么用？Python tagged_words使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了tagged_words函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: exercise3

def exercise3():
    print
    print "Exercise 3"
    print "Part 1"
    count = 0
    total_brown_tagged_words = bn.tagged_words()
    cfd1 = nltk.ConditionalFreqDist(total_brown_tagged_words)
    set1 = set([a for (a, b) in total_brown_tagged_words])
    for s in set1:
        if(len(cfd1[s].keys()) == 5):
            count = count + 1

    print "Number of words which have exactly 5 different tags: %d" % count
    print

    print "Part 2"
    print "Words which have the most distinct tags are: "
    tags = [b for (a, b) in bn.tagged_words()]
    fd = nltk.FreqDist(tags)
    ft = fd.keys()
    cfd2 = nltk.ConditionalFreqDist((tag, word) for (word, tag) in bn.tagged_words())

    for a in ft:
        if fd[a] == 1:
            print "For POS: " +a
            print cfd2[a].keys()
            print

    print

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:29，代码来源:Girish_Srinivas_ch5a.py

示例2: init

    def __init__(self):
        """Initialize your data structures in the constructor."""
        tag_corpus = []
        
#        from nltk.corpus import treebank
#        corpus = treebank.tagged_words()
#        for (word,tag) in treebank.tagged_words():
#            tag_corpus.append(tag)
        from nltk.corpus import brown 
        corpus = brown.tagged_words()
        for (word,tag) in brown.tagged_words():
            tag_corpus.append(tag)

        
        self.wordCounts = collections.defaultdict(int)
        self.tagCounts = collections.defaultdict(int)
        self.wordTagCounts = collections.defaultdict(int)
        self.wordTagList = {}
        self.totalTag = 0
        
        self.train(corpus)
        #estimator = lambda fdist, bins: LidstoneProbDist(fdist, 0.2) 
        #estimator = lambda fdist, bins: WittenBellProbDist(fdist, 0.2) 
        estimator = _estimator
        self.tagLM = NgramModel(2, tag_corpus, estimator)

开发者ID:tonyqtian，项目名称:sentence_checker，代码行数:25，代码来源:WordTagModel.py

示例3: verb_stem

def verb_stem(s):
    """extracts the stem from the 3sg form of a verb, or returns empty string"""
    
    # goes through rules outlined in handout
    if re.match ("has", s):
        toReturn =  'have'
    elif re.match (".*(ays|eys|iys|oys|uys)", s):
        toReturn = s[:-1]
    elif re.match (".*(ies)", s):
        if (len(s) == 4):
            toReturn = s[:-1]
        else:
            s1 = s[:-3]
            s2 = s1 + "y"
            toReturn = s2 
    elif re.match(".*(oes|xes|ches|shes|sses|zzes)", s):
        toReturn = s[:-2]
    elif re.match (".*(!sses|!zzes|ses|zes)", s):
        toReturn = s[:-1]
    elif re.match(".*(!ies|!oes|!ses|!xes|!ches|!shes|es)", s):
        toReturn = s[:-1]
    elif re.match(".*(!ss|!xs|!ys|!zs|!chs|!shs|s)", s):
        toReturn = s[:-1]
    else:
        toReturn = ''

    # will check if original plural or creted singular verb is in the Brown corpus. 
    if ((s, 'VBZ') not in brown.tagged_words()):
        if ((toReturn, 'VB') not in brown.tagged_words()):
            return ''
        else: 
            return toReturn
    else:
        return toReturn

开发者ID:mtkent，项目名称:PFaNL-Assignment-2---Fall-2015，代码行数:34，代码来源:statements.py

示例4: exercise2

def exercise2(category):
    print
    print "For Category: " + category
    print "Part 1"
    print "Words with the tag 'JJ':"
    words = bn.tagged_words(categories = category)
    wordlist = bn.words(categories = category)
    words_JJ = set(sorted([(word, tag) for (word, tag) in words if tag == 'JJ']))
    print len(words_JJ)
    print
    print "Part 2"
    print "Words with tags 'VBZ' -> 3rd Person Singular Verbs or ('NNPS' or 'NNS') -> plural nouns:"
    words_VBP_NNPS_NNS = [(word, tag) for (word, tag) in words if tag == 'VBZ' or tag == 'NNPS' or tag == 'NNS']
    print words_VBP_NNPS_NNS[:10]
    print
    sent = ""
    print "Part 3"
    print "The 3 most frequent 3-word prepositional phrases are:"
    words = bn.tagged_words(categories = category)
    for (w1, t1), (w2, t2), (w3, t3) in nltk.trigrams(words):
        if(t1.startswith('IN') and t2.startswith('AT') and t3.startswith('NN')):
            sent = sent + w1.lower() + " " + w2.lower() + " " + w3.lower() + "."
    sent_part = sent.split(".")
    fd = nltk.FreqDist(sent_part)
    v = fd.most_common(3)
    print v
    print
    print "Part 4"
    print "Ratio of Masculine to Feminine is:"
    male_pattern = r'\bhe\b|\bhis\b|\bhim\b|\bhimself\b'
    female_pattern = r'\bshe\b|\bher\b|\bhers\b|\bherself\b'
    male_pronouns = len([w for w in wordlist if re.search(male_pattern, w.lower())])
    female_pronouns = len([w for w in wordlist if re.search(female_pattern, w.lower())])
    print "Male : Female is -> %d : %d" %(male_pronouns, female_pronouns)
    print

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:35，代码来源:Girish_Srinivas_ch5a.py

示例5: exploreTaggedCorpora

def exploreTaggedCorpora():

    brown_learned_text = brown.words(categories="learned")
    sorted(set(b for (a, b) in nltk.ibigrams(brown_learned_text) if a == "often"))

    brown_lrnd_tagged = brown.tagged_words(categories="learned", simplify_tags=True)
    tags = [b[1] for (a, b) in nltk.ibigrams(brown_lrnd_tagged) if a[0] == "often"]
    fd = nltk.FreqDist(tags)
    fd.tabulate()

    def process(sentence):
        for (w1, t1), (w2, t2), (w3, t3) in nltk.trigrams(sentence):
            if t1.startswith("V") and t2 == "TO" and t3.startswith("V"):
                print w1, w2, w3

    for tagged_sent in brown.tagged_sents():
        process(tagged_sent)

    brown_news_tagged = brown.tagged_words(categories="news", simplify_tags=True)
    data = nltk.ConditionalFreqDist((word.lower(), tag) for (word, tag) in brown_news_tagged)

    for word in data.conditions():
        if len(data[word]) > 3:
            tags = data[word].keys()
            print word, " ".join(tags)

开发者ID:AkiraKane，项目名称:Python，代码行数:25，代码来源:c05_tagger.py

示例6: tagged_token_representation

def tagged_token_representation():
  print nltk.tag.str2tuple("fly/NN")
  from nltk.corpus import brown
  print brown.tagged_words()
  # distribution of tags
  brown_news_tagged = brown.tagged_words(categories="news", simplify_tags=True)
  tag_fd = nltk.FreqDist(tag for (word, tag) in brown_news_tagged)
  print tag_fd
  tag_fd.plot(cumulative=True)
  # distribution of POS+N pairs
  word_tag_pairs = nltk.bigrams(brown_news_tagged)
  print nltk.FreqDist(a[1] for (a, b) in word_tag_pairs if b[1] == "N")

开发者ID:navjord，项目名称:TDT4501，代码行数:12，代码来源:svm-pipeline-step3.py

示例7: automaticTagging

def automaticTagging():
    from nltk.corpus import brown
    print "=============== The Default Tagger   ==============="
    brown_tagged_sents = brown.tagged_sents(categories='news')
    print brown_tagged_sents[0:3]
    brown_sents = brown.sents(categories='news')
    print brown_sents[0:3]

    tags = [tag for (word, tag) in brown.tagged_words(categories='news')]
    print nltk.FreqDist(tags).max()

    raw = 'I do not like green eggs and ham, I do not like them Sam I am!'
    tokens = nltk.word_tokenize(raw)
    default_tagger = nltk.DefaultTagger('NN')
    print default_tagger.tag(tokens)

    print  default_tagger.evaluate(brown_tagged_sents)

    print "=============== The Regular Expression Tagger  ==============="
    patterns = [(r'.*ing$', 'VBG'), (r'.*ed$', 'VBD'), (r'.*es$', 'VBZ'), (r'.*ould$', 'MD'), (r'.*\'s$', 'NN$'), (r'.*s$', 'NNS'), (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), (r'.*', 'NN') ]
    regexp_tagger = nltk.RegexpTagger(patterns)
    print regexp_tagger.tag(brown_sents[3])
    print regexp_tagger.evaluate(brown_tagged_sents)

    print "=============== The Lookup Tagger ==============="
    fd = nltk.FreqDist(brown.words(categories='news'))
    cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories='news'))
    most_freq_words = fd.keys()[:100]
    print most_freq_words
    likely_tags = dict((word, cfd[word].max()) for word in most_freq_words)
    baseline_tagger = nltk.UnigramTagger(model=likely_tags)
    print baseline_tagger
    print baseline_tagger.evaluate(brown_tagged_sents)

    def performance(cfd, wordlist):
        lt = dict((word, cfd[word].max()) for word in wordlist)
        baseline_tagger = nltk.UnigramTagger(model=lt, backoff=nltk.DefaultTagger('NN'))
        return baseline_tagger.evaluate(brown.tagged_sents(categories='news'))

    def display():
        import pylab
        words_by_freq = list(nltk.FreqDist(brown.words(categories='news')))
        cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories='news'))
        sizes = 2 ** pylab.arange(15)
        perfs = [performance(cfd, words_by_freq[:size]) for size in sizes]
        pylab.plot(sizes, perfs, '-bo')
        pylab.title('Lookup Tagger Performance with Varying Model Size')
        pylab.xlabel('Model Size')
        pylab.ylabel('Performance')
        pylab.show()

    display()

开发者ID:hbdhj，项目名称:python，代码行数:52，代码来源:chapter5.py

示例8: ch05_11_train_test_affix_tagger

def ch05_11_train_test_affix_tagger():
  from nltk.corpus import brown
  fd = nltk.FreqDist(brown.words(categories="news"))
  cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories="news"))
  most_freq_pos = dict((word, cfd[word].max()) for word in fd.keys())
  affix_tagger = nltk.AffixTagger(model=most_freq_pos)
  print affix_tagger.evaluate(brown.tagged_sents(categories="editorial"))

开发者ID:447327642，项目名称:nltk-examples，代码行数:7，代码来源:ch05_ex.py

示例9: ch05_34_num_words_with_1to10_distinct_tags

def ch05_34_num_words_with_1to10_distinct_tags():
  from nltk.corpus import brown
  tagged_words = brown.tagged_words(categories="news")
  # number of distinct tags and number of words in corpus for this
  dd = nltk.defaultdict(set)
  for w,t in tagged_words:
    dd[w].add(t)
  for i in range(1,10):
    print i, len(filter(lambda x: len(dd[x]) == i, dd.keys()))
  # for the word with greatest number of tags, print out concordance
  # one for each tag
  maxtags = 6
  word = None
  tags = None
  for w in dd.keys():
    if len(dd[w]) >= maxtags:
      word = w
      tags = dd[w]
      break
  poss = []
  pos = 0
  for w, t in tagged_words:
    if w == word and t in tags:
      poss.append((t, pos))
      tags.remove(t)
    pos += 1
  for t, pos in poss:
    print t, " ".join(w for w,t in tagged_words[pos-10:pos+10])

开发者ID:447327642，项目名称:nltk-examples，代码行数:28，代码来源:ch05_ex.py

示例10: ch05_21_qualifiers_before_adore_love_like_prefer

def ch05_21_qualifiers_before_adore_love_like_prefer():
  from nltk.corpus import brown
  tagged_words = brown.tagged_words(categories="news")
  tagged_word_bigrams = nltk.bigrams(tagged_words)
  allp = set(["adore", "love", "like", "prefer"])
  print set([w for (w1,t1), (w2,t2) in tagged_word_bigrams
    if t1 == "QL" and w2.lower() in allp])

开发者ID:447327642，项目名称:nltk-examples，代码行数:7，代码来源:ch05_ex.py

示例11: ch05_20_brown_corpus_words_phrases_by_tag

def ch05_20_brown_corpus_words_phrases_by_tag():
  from nltk.corpus import brown
  tagged_words = brown.tagged_words(categories="news")
  # produce alpha sorted list of distinct words tagged MD
  print sorted(set([w.lower()
    for (w,t) in filter(lambda (w,t): t == "MD", tagged_words)]))
  # identify words that can be plural (NRS, NPS*, NNS*) or
  # third person singular verbs (BEDZ*, BEZ*, DOZ*, *BEZ)
  # AND the ones ending with "s"
  print set([w for (w, t) in tagged_words
    if w.lower().endswith("s") and
    (t == "NRS" or t.startswith("NPS")
    or t.startswith("NPS") or t.startswith("NNS")
    or t.startswith("BEDZ") or t.startswith("BEZ")
    or t.startswith("DOZ") or t.endswith("BEZ"))])
  # identify 3 word prepositional phrases IN+DET+NN
  tagged_word_trigrams = nltk.trigrams(tagged_words)
  print tagged_word_trigrams[:10]
  print set([" ".join([w1, w2, w3])
    for (w1,t1), (w2,t2), (w3,t3) in tagged_word_trigrams
    if t1 == "IN" and t2 == "DET" and t3 == "NN"])
  # ratio of masculine to feminine pronouns
  num_masc_pn = len([w for (w,t) in tagged_words if w.lower() == "he"])
  num_fem_pn = len([w for (w,t) in tagged_words if w.lower() == "she"])
  print "masc/fem = ", (num_masc_pn / num_fem_pn)

开发者ID:447327642，项目名称:nltk-examples，代码行数:25，代码来源:ch05_ex.py

示例12: category_by_pos

def category_by_pos():
    from nltk.corpus import brown
    from nltk import FreqDist
    from nltk import DecisionTreeClassifier
    from nltk import NaiveBayesClassifier
    from nltk import classify

    suffix_fdist = FreqDist()
    for word in brown.words():
        word = word.lower()
        suffix_fdist.inc(word[-1:])
        suffix_fdist.inc(word[-2:])
        suffix_fdist.inc(word[-3:])

    common_suffixes = suffix_fdist.keys()[:100]
#    print common_suffixes

    def pos_features(word):
        features = {}
        for suffix in common_suffixes:
            features['endswith(%s)' % suffix] = word.lower().endswith(suffix)
        return features

    tagged_words = brown.tagged_words(categories='news')
    featuresets = [(pos_features(n), g) for (n, g) in tagged_words]
    size = int(len(featuresets) * 0.1)
    train_set, test_set = featuresets[size:], featuresets[:size]
#    classifier = DecisionTreeClassifier.train(train_set)
#    print 'Decision Tree %f' % classify.accuracy(classifier, test_set)

    classifier = NaiveBayesClassifier.train(train_set)
    print 'NaiveBay %f' % classify.accuracy(classifier, test_set)

开发者ID:brenden17，项目名称:infinity，代码行数:32，代码来源:category_nltk.py

示例13: lookupTagger

def lookupTagger():

    fd = nltk.FreqDist(brown.words(categories='news'))
    cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories='news'))
    most_freq_words = fd.keys()[:100]
    likely_tags = dict((word, cfd[word].max()) for word in most_freq_words)
    baseline_tagger = nltk.UnigramTagger(model=likely_tags)
    baseline_tagger.evaluate(brown_tagged_sents)

    sent = brown.sents(categories='news')[3]
    baseline_tagger.tag(sent)

    baseline_tagger = nltk.UnigramTagger(model=likely_tags,
            backoff=nltk.DefaultTagger('NN'))

    def performance(cfd, wordlist):
        lt = dict((word, cfd[word].max()) for word in wordlist)
        baseline_tagger = nltk.UnigramTagger(model=lt, backoff=nltk.DefaultTagger('NN'))
        return baseline_tagger.evaluate(brown.tagged_sents(categories='news'))

    def display():
        import pylab
        words_by_freq = list(nltk.FreqDist(brown.words(categories='news')))
        cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories='news'))
        sizes = 2 ** pylab.arange(15)
        perfs = [performance(cfd, words_by_freq[:size]) for size in sizes]
        pylab.plot(sizes, perfs, '-bo')
        pylab.title('Lookup Tagger Performance with Varying Model Size')
        pylab.xlabel('Model Size')
        pylab.ylabel('Performance')
        pylab.show()

开发者ID:AkiraKane，项目名称:Python，代码行数:31，代码来源:c05_auto_tagging.py

示例14: demo

def demo():
    root = Tk()
    root.bind('<Control-q>', lambda e: root.destroy())

    table = Table(root, 'Word Synset Hypernym Hyponym'.split(),
                  column_weights=[0, 1, 1, 1], 
                  reprfunc=(lambda i,j,s: '  %s' % s))
    table.pack(expand=True, fill='both')

    from nltk.corpus import wordnet
    from nltk.corpus import brown
    for word, pos in sorted(set(brown.tagged_words()[:500])):
        if pos[0] != 'N': continue
        word = word.lower()
        for synset in wordnet.synsets(word):
            hyper = (synset.hypernyms()+[''])[0]
            hypo = (synset.hyponyms()+[''])[0]
            table.append([word,
                          getattr(synset, 'definition', '*none*'),
                          getattr(hyper, 'definition', '*none*'),
                          getattr(hypo, 'definition', '*none*')])

    table.columnconfig('Word', background='#afa')
    table.columnconfig('Synset', background='#efe')
    table.columnconfig('Hypernym', background='#fee')
    table.columnconfig('Hyponym', background='#ffe')
    for row in range(len(table)):
        for column in ('Hypernym', 'Hyponym'):
            if table[row, column] == '*none*':
                table.itemconfig(row, column, foreground='#666',
                                 selectforeground='#666')
    root.mainloop()

开发者ID:ciju，项目名称:yql_hash，代码行数:32，代码来源:table.py

示例15: question2

def question2(category):
	#print
	#print "For Category: " + category
	#print "Words with the tag 'JJ':"
	#print
	words = bn.tagged_words(categories = category)
	wordlist = bn.words(categories = category)
	words_JJ = set(sorted([(word, tag) for (word, tag) in words if tag == 'JJ']))
	print len(words_JJ)
	print
	print
	print "Words with tags 'VBZ' -> 3rd Person Singular Verbs or ('NNPS' or 'NNS') -> plural nouns:"
	print
	words_VBP_NNPS_NNS = [(word, tag) for (word, tag) in words if tag == 'VBZ' or tag == 'NNPS' or tag == 'NNS']
	print words_VBP_NNPS_NNS[:10]
	print
	print
	print "Ratio"
	print
	male_pattern = r'\bhe\b|\bhis\b|\bhim\b|\bhimself\b'
	female_pattern = r'\bshe\b|\bher\b|\bhers\b|\bherself\b'
	male_pronouns = len([w for w in wordlist if re.search(male_pattern, w.lower())])
	female_pronouns = len([w for w in wordlist if re.search(female_pattern, w.lower())])
	print "Male : Female is -> %d : %d" %(male_pronouns, female_pronouns)
	print
	print
	sent = ""
	print "3 word prepositional phrases are:"

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:28，代码来源:ch5aq2.py

注：本文中的nltk.corpus.brown.tagged_words函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。