当前位置: 首页>>代码示例>>Python>>正文


Python nltk.ne_chunk函数代码示例

本文整理汇总了Python中nltk.ne_chunk函数的典型用法代码示例。如果您正苦于以下问题:Python ne_chunk函数的具体用法?Python ne_chunk怎么用?Python ne_chunk使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了ne_chunk函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract_entities2

def extract_entities2(text):
	entities = []
	
	"""t0 = nltk.DefaultTagger('NN')
	t1 = nltk.UnigramTagger(train_sents, backoff=t0)
	t2 = nltk.BigramTagger(train_sents, backoff=t1)
	t2.evaluate(test_sents)"""
	
	for sentence in sent_tokenize(text):
	    #print pos_tag(nltk.word_tokenize(sentence))
	    print sentence
	    tags=pos_tag(nltk.word_tokenize(sentence))
	    tags=tagear(tags)
	    chunks = ne_chunk(pos_tag(nltk.word_tokenize(sentence)))
	    #chunks = ne_chunk(regexp_tagger.tag((nltk.word_tokenize(text))))
	    chunks = ne_chunk(tags)
	    #chunks.draw()
	    #print chunks
	    for chunk in chunks:
	    	#print chunk
	    	#if hasattr(chunk, 'node'):
	    	#	print chunk.node
	    	if hasattr(chunk, 'node') :
	    		print chunk	
	    		entities.extend([chunk for chunk in chunks if hasattr(chunk, 'node')])
	return entities
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:Tokenizacion.py

示例2: test_nltkNERParsing

    def test_nltkNERParsing(self):
        testString = 'Natural Sciences and Engineering Research Council of Canada'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        getGPEs = []

        for treeBranch in chunked:
            if hasattr(treeBranch, 'label') and treeBranch.label() == 'GPE':
                getGPEs.append(str(treeBranch))

        self.assertEqual(1, len(getGPEs))

        testString = 'Milwaukee Foundation'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        # returns (S (PERSON Milwaukee/NNP) (ORGANIZATION Foundation/NNP))

        testString = 'New England Board of Higher Education'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        # returns (S (GPE New/NNP)(ORGANIZATION England/NNP Board/NNP) of/IN (PERSON Higher/NNP Education/NNP))

        testString = 'New England Board of Higher Education'
        unigrams = TokenizeOnWhitespacePunctuation(testString).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
开发者ID:kyajmiller,项目名称:Cerebro,代码行数:29,代码来源:TestClassifyBadScholarships.py

示例3: extractNE

def extractNE(sentence, withClass):
    words = nltk.word_tokenize(sentence)  # Extract words from sentence: Stopwords removed, punctuations removed
    if withClass:
        tree = nltk.ne_chunk(nltk.pos_tag(words), binary=False)
        return extractNEwithClass(tree)
    else:
        tree = nltk.ne_chunk(nltk.pos_tag(words), binary=True)
        return extractNEwithoutClass(tree)
开发者ID:nytlabs,项目名称:linguo,代码行数:8,代码来源:lookup.py

示例4: nameEntityExtract

def nameEntityExtract(document):
	sentences = nltk.sent_tokenize(document)
	sentences = [nltk.word_tokenize(sent) for sent in sentences]
	sentences = [nltk.pos_tag(sent) for sent in sentences]
	print sentences[0]
	print "the length of sentences is: " + str(len(sentences))
	sent = sentences[0]
	print nltk.ne_chunk(sent,binary=True)
开发者ID:yuqiaoyan,项目名称:Python,代码行数:8,代码来源:nltkExtract.py

示例5: English_NER

def English_NER(sentence):
    # 命名实体只被标注为NE
    print '命名实体只被标注为NE:'
    print nltk.ne_chunk(sentence, binary=True)

    # 命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等
    print '命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等:'
    print nltk.ne_chunk(sentence)
开发者ID:littlewilliam,项目名称:Natural-Language-process,代码行数:8,代码来源:1_Named_Entity_Recognition.py

示例6: main

def main():
    sent = nltk.corpus.treebank.tagged_sents()[22]
    print "sent (nltk):", sent
    #print nltk.ne_chunk(sent, binary=True)
    #print nltk.ne_chunk(sent)

    sent = ie_preprocess("""Injured personnel consisting of six Schlum employees were immediately transported
                        to nearby hospitals and most of them (were)
                        discharged after having received treatment""")
    print sent
    print nltk.ne_chunk(sent[0])
开发者ID:attibalazs,项目名称:nltk-examples,代码行数:11,代码来源:7.5_Named_Entity_Recognition.py

示例7: process_contents

def process_contents():
    try:
        for i in tokenized[5:]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            namedEnt = nltk.ne_chunk(tagged) #White, House
            namedEnt = nltk.ne_chunk(tagged, binary = True) #White House
            namedEnt.draw()

    except Exception as e:
        print(str(e))
开发者ID:matt-ice,项目名称:python_nltk_tutorial,代码行数:11,代码来源:Unit+07+-+Named+Entry+Recognition.py

示例8: process_content

def process_content():
    try:
        for i in tokenized:
            words = nltk.word_tokenize(i);
            tagged = nltk.pos_tag(words)

            namedEnt1 = nltk.ne_chunk(tagged) #Give all named entities with category
            namedEnt2 = nltk.ne_chunk(tagged, binary=True) #This gives named entity without category

            namedEnt2.draw()

    except Exception as e:
        print(str(e))
开发者ID:MaryamZi,项目名称:WSO2_PYTHON_NLTK,代码行数:13,代码来源:NamedEntityEcognition.py

示例9: process_content

def process_content():
    for i in custom_tokenized[5:]:
        words = word_tokenize(i)
        tagged = nltk.pos_tag(words)
        namedEnt = nltk.ne_chunk(tagged);

        print(namedEnt)
开发者ID:jmarthernandez,项目名称:py-nltk,代码行数:7,代码来源:mlk.py

示例10: get_entities

	def get_entities(self,sentences):
		""" The function returns the dictionary containing the results for
		the Name Entity Recognition analyze.

		Args:
		   sentences: the sentences list.

		Returns:
			dictionary:
		"""
		entities = dict([])

		# Tokenization
		tokens = [nltk.tokenize.word_tokenize(s) for s in sentences]

		# Part-Of-Speech tagging
		pos_tagged_tokens = [nltk.pos_tag(t) for t in tokens]

		# Chunking
		chunked_nes = [nltk.ne_chunk(c) for c in pos_tagged_tokens]

		for tree in chunked_nes:
			for s in tree.subtrees(lambda t: (t.height()==2)):
				if s.label()!='S':
					entity = ' '.join(i[0] for i in s.leaves())
					if s.label() in entities.keys():
						if entity not in entities[s.label()]:
							entities[s.label()].append(entity)
							entities[s.label()].sort()
					else:	
						entities[s.label()] = [entity]

		return entities
开发者ID:gdamdam,项目名称:sumo,代码行数:33,代码来源:analyzer.py

示例11: parse_questions

def parse_questions():
  print "Parsing Questions..."
  parsed_questions = {}
  with open(DIR+'/questions.txt', 'r') as f:
    data = f.read()
    questions = re.split('[\s]*</top>[\s]*', data)
    if len(questions[-1].strip()) == 0: questions.pop()
    qc = QuestionClassifier.QuestionClassifier()
    for question in questions:
      question_number = int(re.search(r"<num>[\s]*Number:[\s]*([0-9]+)", question).group(1))
      question = re.search(r"<desc>[\s]*Description:[\s]*([a-zA-Z0-9\-\?\'\. ]+)", question).group(1)
      question_words = nltk.word_tokenize(question)
      question_pos = nltk.pos_tag(question_words)
      question_nes = nltk.ne_chunk(question_pos)
      question_tree = Chunker.chunker.parse(question_pos)
      question_classification = qc.classify(question)
      qwords, nouns, nes = [], [], []
      for part in question_nes:
        try:
          nes.append((part.node, part.leaves()[0][0]))
        except:
          if part[1] == 'WP' or part[1] == 'WRB':
            qwords.append(part[0])
          elif part[1] == 'NN' or part[1] == 'NNP':
            nouns.append(part[0])
      # print qwords, nouns, nes
      # print question_pos
      parsed_questions[question_number] = { "question": question, "pos": question_pos, "ne": question_nes, "parse_tree": question_tree, "question_classification": question_classification, "question_words": qwords, "nouns": nouns, "ne_words": nes }
  with open(DIR+'/parsed_questions.txt', 'wb') as f:
    pickle.dump(parsed_questions, f)
开发者ID:jcccf,项目名称:cs4740,代码行数:30,代码来源:Parser.py

示例12: extract_normal_ne

 def extract_normal_ne(self, text):
     result = []
     for sent in sent_tokenize(text) if text else []:
         for chunk in ne_chunk(pos_tag(word_tokenize(sent))):
             if hasattr(chunk, "node"):
                 result.append(" ".join([c[0] for c in chunk.leaves()]))
     return result
开发者ID:rchiba,项目名称:HipTrip,代码行数:7,代码来源:linkage.py

示例13: extract_concepts

def extract_concepts(text):
    """
    Uses the NLTK natural language processing library to 
    extract from a text the essential terms that appeared in it.
    """
    try:
        ignored_words = corpus.stopwords.words('english')
        ignored_words.append("n't")
        appeared = {}
        concepts = []
        tokenized = nltk.word_tokenize(text)
        tagged = nltk.pos_tag(tokenized)
        named_entities = nltk.ne_chunk(tagged)
        
        for ne in named_entities.leaves():
            #if ne[1] in ('NNS', 'NNP', 'NN'):
            if len(ne[0]) > 2 and ne[0].lower() not in ignored_words and not (ne[0].startswith("http") or ne[0].startswith("//")):
                name = ne[0]
                if name in appeared:
                    continue
                concepts.append(name)
                appeared[name] = True
    except:
        print "extract concepts failed:", sys.exc_info()
    return concepts
开发者ID:dibaunaumh,项目名称:ikana1010,代码行数:25,代码来源:views.py

示例14: ne_tag

def ne_tag(sentences):
    tagged = raw_trigram_tag(sentences, tagger_file="tagger.pkl")[1]
    fin = []
    for tagged_sent in tagged:
        # print tagged_sent
        fin.append(nltk.ne_chunk(tagged_sent))
    return fin
开发者ID:atokop,项目名称:compling,代码行数:7,代码来源:named_entity_exec.py

示例15: processor

def processor(data):
    try:
        tokenized = nltk.word_tokenize(data)
        tagged = nltk.pos_tag(tokenized)
        namedEnt = nltk.ne_chunk(tagged, binary=True)

        entities = re.findall(r'NE\s(.*?)/', str(namedEnt))
        #     ('
        descriptives = re.findall(r'\(\'(\w*)\'.\s\'JJ\w?\'',str(tagged))
        if len(entities) > 1:
            pass
        elif len(entities) == 0:
            pass
        elif str(entities) == '_blank':
            pass
        else:
            print 'Named: ', entities[0]
            print 'Description: '
            for eachDesc in descriptives:
                print eachDesc
                currentTime = time.time()
                dateStamp = datetime.datetime.fromtimestamp(currentTime).strftime('%Y-%m-%d %H:%M:%S')
                namedEntity = entities[0]
                relatedWord = eachDesc
                c.execute("INSERT INTO knowledgeBase (unix, dateStamp, namedEntity, relatedWord) VALUES (?,?,?,?)",
                          (currentTime, dateStamp, namedEntity, relatedWord))

                conn.commit()


                

    except Exception, e:
        print 'failed in the first try of processor'
        print str(e)
开发者ID:gavve,项目名称:twitter-sentiment-analysis,代码行数:35,代码来源:KnowledgeBase.py


注:本文中的nltk.ne_chunk函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。