当前位置: 首页>>代码示例>>Python>>正文


Python nltk.ne_chunk方法代码示例

本文整理汇总了Python中nltk.ne_chunk方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.ne_chunk方法的具体用法?Python nltk.ne_chunk怎么用?Python nltk.ne_chunk使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk的用法示例。


在下文中一共展示了nltk.ne_chunk方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: learnAnaphora

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def learnAnaphora(self):
        sentences = [
            "John is a man. He walks",
            "John and Mary are married. They have two kids",
            "In order for Ravi to be successful, he should follow John",
            "John met Mary in Barista. She asked him to order a Pizza"
        ]

        for sent in sentences:
            chunks = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent)), binary=False)
            stack = []
            print(sent)
            items = tree2conlltags(chunks)
            for item in items:
                if item[1] == 'NNP' and (item[2] == 'B-PERSON' or item[2] == 'O'):
                    stack.append((item[0], self.gender(item[0])))
                elif item[1] == 'CC':
                    stack.append(item[0])
                elif item[1] == 'PRP':
                    stack.append(item[0])
            print("\t {}".format(stack)) 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:23,代码来源:Anaphora.py

示例2: extractNE

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def extractNE():
    while True:
        if queues[1].empty():
            break
        else:
            data = queues[1].get()
            postags = data['input']
            queues[1].task_done()
            chunks = nltk.ne_chunk(postags, binary=False)
            print("  << {} : ".format(data['uuid']), end = '')
            for path in chunks:
                try:
                    label = path.label()
                    print(path, end=', ')
                except:
                    pass
            print() 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:19,代码来源:PipelineQ.py

示例3: get_entities

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def get_entities(self, document):
        """
        Extract entities from a single document using the
        nltk.tree.ne_chunk method

        This method is called multiple times by the tranform method

        :param document: a list of lists of tuples
        :return entities: a list of comma-separated strings
        """
        entities = []
        for paragraph in document:
            for sentence in paragraph:
                # classifier chunk the sentences, adds category labels, e.g. PERSON
                trees = ne_chunk(sentence)
                # select only trees with the kinds of entities we want
                for tree in trees:
                    if hasattr(tree, 'label'):
                        if tree.label() in self.labels:
                            # entities is a list, each entry is a list of entities
                            # for a document
                            entities.append(
                                ' '.join([child[0].lower() for child in tree])
                                )
        return entities 
开发者ID:foxbook,项目名称:atap,代码行数:27,代码来源:ner.py

示例4: _process

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def _process(self, input_pack: DataPack):
        for sentence in input_pack.get(Sentence):
            token_entries = list(input_pack.get(
                entry_type=Token, range_annotation=sentence,
                components=self.token_component))
            tokens = [(token.text, token.pos) for token in token_entries]
            ne_tree = ne_chunk(tokens)

            index = 0
            for chunk in ne_tree:
                if hasattr(chunk, 'label'):
                    # For example:
                    # chunk: Tree('GPE', [('New', 'NNP'), ('York', 'NNP')])
                    begin_pos = token_entries[index].span.begin
                    end_pos = token_entries[index + len(chunk) - 1].span.end
                    entity = EntityMention(input_pack, begin_pos, end_pos)
                    entity.ner_type = chunk.label()
                    index += len(chunk)
                else:
                    # For example:
                    # chunk: ('This', 'DT')
                    index += 1 
开发者ID:asyml,项目名称:forte,代码行数:24,代码来源:nltk_processors.py

示例5: ne_chunked

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunked():
    print()
    print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
    print("=" * 45)
    ROLE = re.compile(r'.*(chairman|president|trader|scientist|economist|analyst|partner).*')
    rels = []
    for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
        sent = nltk.ne_chunk(sent)
        rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7)
        for rel in rels:
            print('{0:<5}{1}'.format(i, rtuple(rel))) 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:13,代码来源:relextract.py

示例6: ne_chunking

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunking(targets):
    while True:
        tagged_words = (yield)
        ner_tagged = nltk.ne_chunk(tagged_words)
        for target in targets:
            target.send(ner_tagged) 
开发者ID:PacktPublishing,项目名称:Hands-on-NLP-with-NLTK-and-scikit-learn-,代码行数:8,代码来源:nlp-6.1-nlp-pipeline.py

示例7: named_entities

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def named_entities(self):
        # word_tokenize should work well for most non-CJK languages
        text = nltk.word_tokenize(self.text)
        
        # TODO: this works only for english. Stanford's pos tagger supports
        # more languages
        # http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford
        # http://stackoverflow.com/questions/1639855/pos-tagging-in-german
        # PT corpus http://aelius.sourceforge.net/manual.html
        # 
        pos_tag = nltk.pos_tag(text)
        
        nes = nltk.ne_chunk(pos_tag)
        return nes 
开发者ID:Corollarium,项目名称:geograpy2,代码行数:16,代码来源:extraction.py

示例8: sampleNE

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def sampleNE():
    sent = nltk.corpus.treebank.tagged_sents()[0]
    print(nltk.ne_chunk(sent)) 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:5,代码来源:NER.py

示例9: sampleNE2

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def sampleNE2():
    sent = nltk.corpus.treebank.tagged_sents()[0]
    print(nltk.ne_chunk(sent, binary=True)) 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:5,代码来源:NER.py

示例10: demo

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def demo(samplestrings):
    for s in samplestrings:
        words = nltk.word_tokenize(s)
        tagged = nltk.pos_tag(words)
        # chunks = nltk.ne_chunk(tagged)
        chunks = cp.parse(tagged)
        print(nltk.tree2conllstr(chunks))
        print(chunks) 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:10,代码来源:OwnNE.py

示例11: fetch_name

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def fetch_name(resume_text):
  tokenized_sentences = nltk.sent_tokenize(resume_text)
  for sentence in tokenized_sentences:
    for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
      if hasattr(chunk, 'label'):# and chunk.label() == 'PERSON':
        chunk = chunk[0]
      (name, tag) = chunk
      if tag == 'NOUN':
        return name

  return "Applicant name couldn't be processed" 
开发者ID:skcript,项目名称:cvscan,代码行数:13,代码来源:language_parser.py

示例12: get_entities

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def get_entities(self, document):
        entities = []
        for paragraph in document:
            for sentence in paragraph:
                trees = ne_chunk(sentence)
                for tree in trees:
                    if hasattr(tree, 'label'):
                        if tree.label() in self.labels:
                            entities.append(
                                ' '.join([child[0].lower() for child in tree])
                                )
        return entities 
开发者ID:foxbook,项目名称:atap,代码行数:14,代码来源:transformers.py

示例13: ne_chunked

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def ne_chunked():
    IN = re.compile(r'.*\bin\b(?!\b.+ing)')
    rels = []
    for sent in nltk.corpus.treebank.tagged_sents()[:100]:
        sent = nltk.ne_chunk(sent)
        print extract_rels('ORG', 'LOC', sent, corpus='ace', pattern = IN) 
开发者ID:blackye,项目名称:luscan-devel,代码行数:8,代码来源:relextract.py

示例14: nltk_tagger

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def nltk_tagger(self, token_text):
        tagged_words = nltk.pos_tag(token_text)
        ne_tagged = nltk.ne_chunk(tagged_words)
        return (ne_tagged)

    # Tag tokens with standard NLP BIO tags 
开发者ID:singnet,项目名称:nlp-services,代码行数:8,代码来源:entity_recognizer_mod.py

示例15: extract_entities

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ne_chunk [as 别名]
def extract_entities(self, doc):
        sentence_list = []
        for sent in sent_tokenize(doc):
            sentence_list.append(
                [chunk for chunk in ne_chunk(pos_tag(word_tokenize(sent)))])
        return sentence_list

    # TODO spacy 
开发者ID:laugustyniak,项目名称:textlytics,代码行数:10,代码来源:document_preprocessing.py


注:本文中的nltk.ne_chunk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。