当前位置: 首页>>代码示例>>Python>>正文


Python nltk.ne_chunk_sents函数代码示例

本文整理汇总了Python中nltk.ne_chunk_sents函数的典型用法代码示例。如果您正苦于以下问题:Python ne_chunk_sents函数的具体用法?Python ne_chunk_sents怎么用?Python ne_chunk_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了ne_chunk_sents函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parts_of_speech

 def parts_of_speech(self, corpus):
     "returns named entity chunks in a given text"
     sentences = nltk.sent_tokenize(corpus)  #Uso toknenizer para español
     tokenized = [nltk.word_tokenize(sentence) for sentence in sentences]
     pos_tags  = [nltk.pos_tag(sentence) for sentence in tokenized]
     chunked_sents = nltk.ne_chunk_sents(pos_tags, binary=True)
     return chunked_sents
开发者ID:IIC2113-Grupo3-2015,项目名称:Procesador-de-Textos,代码行数:7,代码来源:GeneradorRelaciones.py

示例2: chunkIntoEntities

def chunkIntoEntities( text ):
    entities = []
    sentences = sentenceTokenization(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    
    def extract_entity_names(t):
        entity_names = []
    
        if hasattr(t, 'label') and t.label:
            if t.label() == 'NE':
                entity_names.append(' '.join([child[0] for child in t]))
            else:
                for child in t:
                    entity_names.extend(extract_entity_names(child))
    
        return entity_names
    
    for idx,tree in enumerate(chunked_sentences):
        entity_names = extract_entity_names(tree)
        entities.extend(entity_names)
    
    chunked_content = splitContentbyDelimiter(text, entities)
    return [chunked_content, entities]
开发者ID:dxr1988,项目名称:NLTK-Research,代码行数:25,代码来源:nltk_helper.py

示例3: getEntities

def getEntities(filename):
    with open('harry.txt', 'r') as f:
        sample = f.read()
    sample = sample.decode('unicode_escape').encode('ascii','ignore')
    print "sentence tokenize..."
    sentences = nltk.sent_tokenize(sample)
    print len(sentences)
    sentences = sentences[:len(sentences)/30]
    print len(sentences)
    print "word tokenize..."
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    print "POS tagging..."
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    print "Chunking..."
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    entity_names = []
    print "getting entities..."
    print "total sentences = ", len(chunked_sentences)
    for i, tree in enumerate(chunked_sentences):
        if i%100==0:
            print "on sentence", i
        entity_names.extend(extract_entity_names(tree))
    uniques = list(set(entity_names))
    #only returned named entities that are 2 words or more
    output = [u for u in unique if len(u.split(" ")) >= 2]
开发者ID:wellesleynlp,项目名称:meganokeefe-finalproject,代码行数:25,代码来源:entities.py

示例4: extract_entity_names

def extract_entity_names(text):
    sentences = nltk.sent_tokenize(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

    def entity_names(t):
        names = []

        if hasattr(t, 'label') and t.label:
            if t.label() == 'NE':
                names.append(' '.join([child[0] for child in t]))
            else:
                for child in t:
                    names.extend(entity_names(child))

        return names

    names = []
    for tree in chunked_sentences:
        # Print results per sentence
        # print extract_entity_names(tree)

        names.extend(entity_names(tree))

    return set(names)
开发者ID:michal3141,项目名称:geomedia,代码行数:26,代码来源:ner_extract.py

示例5: nltk_extract_ner

def nltk_extract_ner(text):
    """
    Use of NLTK NE
    :param text:
    :return: list of all extracted NE
    """
    sentences = nltk.sent_tokenize(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=False)

    d = defaultdict(list)

    def extract_entity_names(t):
        entity_names = []

        if hasattr(t, 'label') and t.label:
            #if it is recognized as NE add with key of its type
            if t.label() in ne_types:
                d[t.label()].append(' '.join([child[0] for child in t]))
            else:
                for child in t:
                    entity_names.extend(extract_entity_names(child))

        return entity_names

    for tree in chunked_sentences:
        # Get results per sentence
        extract_entity_names(tree)


    # return all entity names
    return d
开发者ID:bfurlan,项目名称:IE4MAS,代码行数:33,代码来源:nltk_ner_extractor.py

示例6: get_entities

def get_entities(story):
	entities = {}

	'''wrong code, before nltk.pos_tag(), 
		story need to be divide into sentences with',' and '.' using nltk.sent_tokenize(),
		then tokenize each sentence to tokens with ',' and '.' using nltk.word_tokenize.
	storytokens = tokenizer(story) #remove '\'', ',' and '.'
	pos_words = nltk.pos_tag(storytokens)
	'''

	sentences = nltk.sent_tokenize(story)
	tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
	#label 'Boy' and 'Scout' as 'NNP' respectively 
	tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
	#label 'Boy Scout' as 'NE'(entity)
	chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

	#
	entity_in_sentences = []
	for tree in chunked_sentences:
		#extract_entity_names(tree) find entities in each chunked_sentence
		entity_in_sentences.extend(extract_entity_names(tree))
	
	#delete repeat entities in all chunked_sentences
	entities_unique = set(entity_in_sentences)
	#create entities(dict object)
	i = 0
	for entity in entities_unique:
		entities[entity] = i
		i += 1

	return entities
开发者ID:YuzhouWang,项目名称:657-Project,代码行数:32,代码来源:preprocess_data.py

示例7: extractKeywords

def extractKeywords(data):
    array = []
    logging.warning('NLTK processing starts:')
    logging.warning(data)
    for i, item in enumerate(data):
        sample = data[i]
        sentences = nltk.sent_tokenize(sample)
        tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
        tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
        chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

        def extract_entity_names(t):
            entity_names = []
            if hasattr(t, 'label') and t.label:
                if t.label() == 'NE':
                    entity_names.append(' '.join([child[0].lower() for child in t]))
                else:
                    for child in t:
                        entity_names.extend(extract_entity_names(child))
            return entity_names

        entity_names = []
        for tree in chunked_sentences:
            entity_names.extend(extract_entity_names(tree))
        for item in entity_names:
            if item not in stops:
                array.append(item)
    logging.warning('NLTK processing finished:')
    logging.warning(array)
    return array
开发者ID:KseniiaBelorustceva,项目名称:text-analyser,代码行数:30,代码来源:app.py

示例8: extract_named_entities

def extract_named_entities(text_blocks):
    """
    Return a list of named entities extracted from provided text blocks (list of text strings).
    """
    sentences = []
    for text in text_blocks:
        sentences.extend(nltk.sent_tokenize(text))

    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

    def extract_entity_names(t):
        entity_names = []

        if hasattr(t, 'label'):
            if t.label() == 'NE':
                entity_names.append(' '.join([child[0] for child in t]))
            else:
                for child in t:
                    entity_names.extend(extract_entity_names(child))

        return entity_names

    entity_names = []
    for tree in chunked_sentences:
        entity_names.extend(extract_entity_names(tree))

    return set(entity_names)
开发者ID:hasgeek,项目名称:coaster,代码行数:29,代码来源:nlp.py

示例9: get_top_NEs

def get_top_NEs(tagged_sentences, n=TOP_NERs):
    """ Return the n longest named entities of a text """
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    entity_names = []
    for tree in chunked_sentences:
        entity_names.extend(extract_entity_names(tree))

    return sorted(entity_names, key=len, reverse=True)[:n]
开发者ID:pan-webis-de,项目名称:maluleka16,代码行数:8,代码来源:source-retrieval.py

示例10: chunk_sentences

def chunk_sentences(sentences):

    sentences = [nltk.word_tokenize(sent) for sent in sentences]
    sentences = [nltk.pos_tag(sent) for sent in sentences]

    chunked_sentences = nltk.ne_chunk_sents(sentences, binary=True)

    return chunked_sentences
开发者ID:Jwpe,项目名称:entity-extractor,代码行数:8,代码来源:extract_named_entities.py

示例11: ie_process

def ie_process(document):
    "returns named entity chunks in a given text"
    sentences = nltk.sent_tokenize(document)
    tokenized = [nltk.word_tokenize(sentence.translate(string.punctuation)) for sentence in sentences]
    pos_tags  = [nltk.pos_tag(sentence) for sentence in tokenized]
    #print(pos_tags)
    chunked_sents = nltk.ne_chunk_sents(pos_tags, binary=True)
    return chunked_sents
开发者ID:vipmunot,项目名称:Sentiment-Analysis,代码行数:8,代码来源:NLP+processing+and+Named+Entity+_+Relationship+Extraction.py

示例12: extract_person_names

def extract_person_names(text):
    sentences = nltk.sent_tokenize(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [pos_tagger.tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences)

    return set(_flat_map(extract_person_names_from_tree(tree)
                         for tree in chunked_sentences))
开发者ID:csojinb,项目名称:name-extractor-api,代码行数:8,代码来源:name_extractor.py

示例13: extract_named_entities

def extract_named_entities(text):
    sentences = nltk.sent_tokenize(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    entity_names = []
    for tree in chunked_sentences:
        entity_names.extend(extract_entity_names(tree))
    return list(set(entity_names))
开发者ID:dibaunaumh,项目名称:fcs-skateboard,代码行数:9,代码来源:extract_article_concepts.py

示例14: chunked_sentences

def chunked_sentences(text):
    """Splits a large string into chunked sentences [http://www.nltk.org/book/ch07.html#chunking]
    """
    import nltk
    sentences = split_sentences(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    return chunked_sentences
开发者ID:makalaaneesh,项目名称:newspaper,代码行数:9,代码来源:nlp.py

示例15: name_rec1

def name_rec1(sample):
    sentences = nltk.sent_tokenize(sample)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
    entity_names = []
    for tree in chunked_sentences:
        entity_names.extend(extract_entity_names(tree))
    return entity_names
开发者ID:Sapphirine,项目名称:Data-Analytics-of-Video-Popularity,代码行数:9,代码来源:NE.py


注:本文中的nltk.ne_chunk_sents函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。