本文整理汇总了Python中nltk.ne_chunk方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.ne_chunk方法的具体用法?Python nltk.ne_chunk怎么用?Python nltk.ne_chunk使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk
的用法示例。
在下文中一共展示了nltk.ne_chunk方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learnAnaphora
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def learnAnaphora(self):
sentences = [
"John is a man. He walks",
"John and Mary are married. They have two kids",
"In order for Ravi to be successful, he should follow John",
"John met Mary in Barista. She asked him to order a Pizza"
]
for sent in sentences:
chunks = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent)), binary=False)
stack = []
print(sent)
items = tree2conlltags(chunks)
for item in items:
if item[1] == 'NNP' and (item[2] == 'B-PERSON' or item[2] == 'O'):
stack.append((item[0], self.gender(item[0])))
elif item[1] == 'CC':
stack.append(item[0])
elif item[1] == 'PRP':
stack.append(item[0])
print("\t {}".format(stack))
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:23,代码来源:Anaphora.py
示例2: extractNE
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def extractNE():
while True:
if queues[1].empty():
break
else:
data = queues[1].get()
postags = data['input']
queues[1].task_done()
chunks = nltk.ne_chunk(postags, binary=False)
print(" << {} : ".format(data['uuid']), end = '')
for path in chunks:
try:
label = path.label()
print(path, end=', ')
except:
pass
print()
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:19,代码来源:PipelineQ.py
示例3: get_entities
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def get_entities(self, document):
"""
Extract entities from a single document using the
nltk.tree.ne_chunk method
This method is called multiple times by the tranform method
:param document: a list of lists of tuples
:return entities: a list of comma-separated strings
"""
entities = []
for paragraph in document:
for sentence in paragraph:
# classifier chunk the sentences, adds category labels, e.g. PERSON
trees = ne_chunk(sentence)
# select only trees with the kinds of entities we want
for tree in trees:
if hasattr(tree, 'label'):
if tree.label() in self.labels:
# entities is a list, each entry is a list of entities
# for a document
entities.append(
' '.join([child[0].lower() for child in tree])
)
return entities
示例4: _process
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def _process(self, input_pack: DataPack):
for sentence in input_pack.get(Sentence):
token_entries = list(input_pack.get(
entry_type=Token, range_annotation=sentence,
components=self.token_component))
tokens = [(token.text, token.pos) for token in token_entries]
ne_tree = ne_chunk(tokens)
index = 0
for chunk in ne_tree:
if hasattr(chunk, 'label'):
# For example:
# chunk: Tree('GPE', [('New', 'NNP'), ('York', 'NNP')])
begin_pos = token_entries[index].span.begin
end_pos = token_entries[index + len(chunk) - 1].span.end
entity = EntityMention(input_pack, begin_pos, end_pos)
entity.ner_type = chunk.label()
index += len(chunk)
else:
# For example:
# chunk: ('This', 'DT')
index += 1
示例5: ne_chunked
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunked():
print()
print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
print("=" * 45)
ROLE = re.compile(r'.*(chairman|president|trader|scientist|economist|analyst|partner).*')
rels = []
for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
sent = nltk.ne_chunk(sent)
rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7)
for rel in rels:
print('{0:<5}{1}'.format(i, rtuple(rel)))
示例6: ne_chunking
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunking(targets):
while True:
tagged_words = (yield)
ner_tagged = nltk.ne_chunk(tagged_words)
for target in targets:
target.send(ner_tagged)
开发者ID:PacktPublishing,项目名称:Hands-on-NLP-with-NLTK-and-scikit-learn-,代码行数:8,代码来源:nlp-6.1-nlp-pipeline.py
示例7: named_entities
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def named_entities(self):
# word_tokenize should work well for most non-CJK languages
text = nltk.word_tokenize(self.text)
# TODO: this works only for english. Stanford's pos tagger supports
# more languages
# http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford
# http://stackoverflow.com/questions/1639855/pos-tagging-in-german
# PT corpus http://aelius.sourceforge.net/manual.html
#
pos_tag = nltk.pos_tag(text)
nes = nltk.ne_chunk(pos_tag)
return nes
示例8: sampleNE
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def sampleNE():
sent = nltk.corpus.treebank.tagged_sents()[0]
print(nltk.ne_chunk(sent))
示例9: sampleNE2
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def sampleNE2():
sent = nltk.corpus.treebank.tagged_sents()[0]
print(nltk.ne_chunk(sent, binary=True))
示例10: demo
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def demo(samplestrings):
for s in samplestrings:
words = nltk.word_tokenize(s)
tagged = nltk.pos_tag(words)
# chunks = nltk.ne_chunk(tagged)
chunks = cp.parse(tagged)
print(nltk.tree2conllstr(chunks))
print(chunks)
示例11: fetch_name
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def fetch_name(resume_text):
tokenized_sentences = nltk.sent_tokenize(resume_text)
for sentence in tokenized_sentences:
for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
if hasattr(chunk, 'label'):# and chunk.label() == 'PERSON':
chunk = chunk[0]
(name, tag) = chunk
if tag == 'NOUN':
return name
return "Applicant name couldn't be processed"
示例12: get_entities
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def get_entities(self, document):
entities = []
for paragraph in document:
for sentence in paragraph:
trees = ne_chunk(sentence)
for tree in trees:
if hasattr(tree, 'label'):
if tree.label() in self.labels:
entities.append(
' '.join([child[0].lower() for child in tree])
)
return entities
示例13: ne_chunked
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunked():
IN = re.compile(r'.*\bin\b(?!\b.+ing)')
rels = []
for sent in nltk.corpus.treebank.tagged_sents()[:100]:
sent = nltk.ne_chunk(sent)
print extract_rels('ORG', 'LOC', sent, corpus='ace', pattern = IN)
示例14: nltk_tagger
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def nltk_tagger(self, token_text):
tagged_words = nltk.pos_tag(token_text)
ne_tagged = nltk.ne_chunk(tagged_words)
return (ne_tagged)
# Tag tokens with standard NLP BIO tags
示例15: extract_entities
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def extract_entities(self, doc):
sentence_list = []
for sent in sent_tokenize(doc):
sentence_list.append(
[chunk for chunk in ne_chunk(pos_tag(word_tokenize(sent)))])
return sentence_list
# TODO spacy