本文整理汇总了Python中nltk.ne_chunk_sents函数的典型用法代码示例。如果您正苦于以下问题:Python ne_chunk_sents函数的具体用法?Python ne_chunk_sents怎么用?Python ne_chunk_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ne_chunk_sents函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parts_of_speech
def parts_of_speech(self, corpus):
"returns named entity chunks in a given text"
sentences = nltk.sent_tokenize(corpus) #Uso toknenizer para español
tokenized = [nltk.word_tokenize(sentence) for sentence in sentences]
pos_tags = [nltk.pos_tag(sentence) for sentence in tokenized]
chunked_sents = nltk.ne_chunk_sents(pos_tags, binary=True)
return chunked_sents
示例2: chunkIntoEntities
def chunkIntoEntities( text ):
entities = []
sentences = sentenceTokenization(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'label') and t.label:
if t.label() == 'NE':
entity_names.append(' '.join([child[0] for child in t]))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
for idx,tree in enumerate(chunked_sentences):
entity_names = extract_entity_names(tree)
entities.extend(entity_names)
chunked_content = splitContentbyDelimiter(text, entities)
return [chunked_content, entities]
示例3: getEntities
def getEntities(filename):
with open('harry.txt', 'r') as f:
sample = f.read()
sample = sample.decode('unicode_escape').encode('ascii','ignore')
print "sentence tokenize..."
sentences = nltk.sent_tokenize(sample)
print len(sentences)
sentences = sentences[:len(sentences)/30]
print len(sentences)
print "word tokenize..."
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
print "POS tagging..."
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
print "Chunking..."
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
entity_names = []
print "getting entities..."
print "total sentences = ", len(chunked_sentences)
for i, tree in enumerate(chunked_sentences):
if i%100==0:
print "on sentence", i
entity_names.extend(extract_entity_names(tree))
uniques = list(set(entity_names))
#only returned named entities that are 2 words or more
output = [u for u in unique if len(u.split(" ")) >= 2]
示例4: extract_entity_names
def extract_entity_names(text):
sentences = nltk.sent_tokenize(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
def entity_names(t):
names = []
if hasattr(t, 'label') and t.label:
if t.label() == 'NE':
names.append(' '.join([child[0] for child in t]))
else:
for child in t:
names.extend(entity_names(child))
return names
names = []
for tree in chunked_sentences:
# Print results per sentence
# print extract_entity_names(tree)
names.extend(entity_names(tree))
return set(names)
示例5: nltk_extract_ner
def nltk_extract_ner(text):
"""
Use of NLTK NE
:param text:
:return: list of all extracted NE
"""
sentences = nltk.sent_tokenize(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=False)
d = defaultdict(list)
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'label') and t.label:
#if it is recognized as NE add with key of its type
if t.label() in ne_types:
d[t.label()].append(' '.join([child[0] for child in t]))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
for tree in chunked_sentences:
# Get results per sentence
extract_entity_names(tree)
# return all entity names
return d
示例6: get_entities
def get_entities(story):
entities = {}
'''wrong code, before nltk.pos_tag(),
story need to be divide into sentences with',' and '.' using nltk.sent_tokenize(),
then tokenize each sentence to tokens with ',' and '.' using nltk.word_tokenize.
storytokens = tokenizer(story) #remove '\'', ',' and '.'
pos_words = nltk.pos_tag(storytokens)
'''
sentences = nltk.sent_tokenize(story)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
#label 'Boy' and 'Scout' as 'NNP' respectively
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
#label 'Boy Scout' as 'NE'(entity)
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
#
entity_in_sentences = []
for tree in chunked_sentences:
#extract_entity_names(tree) find entities in each chunked_sentence
entity_in_sentences.extend(extract_entity_names(tree))
#delete repeat entities in all chunked_sentences
entities_unique = set(entity_in_sentences)
#create entities(dict object)
i = 0
for entity in entities_unique:
entities[entity] = i
i += 1
return entities
示例7: extractKeywords
def extractKeywords(data):
array = []
logging.warning('NLTK processing starts:')
logging.warning(data)
for i, item in enumerate(data):
sample = data[i]
sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'label') and t.label:
if t.label() == 'NE':
entity_names.append(' '.join([child[0].lower() for child in t]))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))
for item in entity_names:
if item not in stops:
array.append(item)
logging.warning('NLTK processing finished:')
logging.warning(array)
return array
示例8: extract_named_entities
def extract_named_entities(text_blocks):
"""
Return a list of named entities extracted from provided text blocks (list of text strings).
"""
sentences = []
for text in text_blocks:
sentences.extend(nltk.sent_tokenize(text))
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
def extract_entity_names(t):
entity_names = []
if hasattr(t, 'label'):
if t.label() == 'NE':
entity_names.append(' '.join([child[0] for child in t]))
else:
for child in t:
entity_names.extend(extract_entity_names(child))
return entity_names
entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))
return set(entity_names)
示例9: get_top_NEs
def get_top_NEs(tagged_sentences, n=TOP_NERs):
""" Return the n longest named entities of a text """
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))
return sorted(entity_names, key=len, reverse=True)[:n]
示例10: chunk_sentences
def chunk_sentences(sentences):
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
chunked_sentences = nltk.ne_chunk_sents(sentences, binary=True)
return chunked_sentences
示例11: ie_process
def ie_process(document):
"returns named entity chunks in a given text"
sentences = nltk.sent_tokenize(document)
tokenized = [nltk.word_tokenize(sentence.translate(string.punctuation)) for sentence in sentences]
pos_tags = [nltk.pos_tag(sentence) for sentence in tokenized]
#print(pos_tags)
chunked_sents = nltk.ne_chunk_sents(pos_tags, binary=True)
return chunked_sents
开发者ID:vipmunot,项目名称:Sentiment-Analysis,代码行数:8,代码来源:NLP+processing+and+Named+Entity+_+Relationship+Extraction.py
示例12: extract_person_names
def extract_person_names(text):
sentences = nltk.sent_tokenize(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [pos_tagger.tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences)
return set(_flat_map(extract_person_names_from_tree(tree)
for tree in chunked_sentences))
示例13: extract_named_entities
def extract_named_entities(text):
sentences = nltk.sent_tokenize(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))
return list(set(entity_names))
示例14: chunked_sentences
def chunked_sentences(text):
"""Splits a large string into chunked sentences [http://www.nltk.org/book/ch07.html#chunking]
"""
import nltk
sentences = split_sentences(text)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
return chunked_sentences
示例15: name_rec1
def name_rec1(sample):
sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)
entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))
return entity_names