當前位置: 首頁>>代碼示例>>Python>>正文


Python nltk.ne_chunk方法代碼示例

本文整理匯總了Python中nltk.ne_chunk方法的典型用法代碼示例。如果您正苦於以下問題:Python nltk.ne_chunk方法的具體用法?Python nltk.ne_chunk怎麽用?Python nltk.ne_chunk使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk的用法示例。


在下文中一共展示了nltk.ne_chunk方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: learnAnaphora

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def learnAnaphora(self):
        sentences = [
            "John is a man. He walks",
            "John and Mary are married. They have two kids",
            "In order for Ravi to be successful, he should follow John",
            "John met Mary in Barista. She asked him to order a Pizza"
        ]

        for sent in sentences:
            chunks = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent)), binary=False)
            stack = []
            print(sent)
            items = tree2conlltags(chunks)
            for item in items:
                if item[1] == 'NNP' and (item[2] == 'B-PERSON' or item[2] == 'O'):
                    stack.append((item[0], self.gender(item[0])))
                elif item[1] == 'CC':
                    stack.append(item[0])
                elif item[1] == 'PRP':
                    stack.append(item[0])
            print("\t {}".format(stack)) 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:23,代碼來源:Anaphora.py

示例2: extractNE

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def extractNE():
    while True:
        if queues[1].empty():
            break
        else:
            data = queues[1].get()
            postags = data['input']
            queues[1].task_done()
            chunks = nltk.ne_chunk(postags, binary=False)
            print("  << {} : ".format(data['uuid']), end = '')
            for path in chunks:
                try:
                    label = path.label()
                    print(path, end=', ')
                except:
                    pass
            print() 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:19,代碼來源:PipelineQ.py

示例3: get_entities

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def get_entities(self, document):
        """
        Extract entities from a single document using the
        nltk.tree.ne_chunk method

        This method is called multiple times by the tranform method

        :param document: a list of lists of tuples
        :return entities: a list of comma-separated strings
        """
        entities = []
        for paragraph in document:
            for sentence in paragraph:
                # classifier chunk the sentences, adds category labels, e.g. PERSON
                trees = ne_chunk(sentence)
                # select only trees with the kinds of entities we want
                for tree in trees:
                    if hasattr(tree, 'label'):
                        if tree.label() in self.labels:
                            # entities is a list, each entry is a list of entities
                            # for a document
                            entities.append(
                                ' '.join([child[0].lower() for child in tree])
                                )
        return entities 
開發者ID:foxbook,項目名稱:atap,代碼行數:27,代碼來源:ner.py

示例4: _process

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def _process(self, input_pack: DataPack):
        for sentence in input_pack.get(Sentence):
            token_entries = list(input_pack.get(
                entry_type=Token, range_annotation=sentence,
                components=self.token_component))
            tokens = [(token.text, token.pos) for token in token_entries]
            ne_tree = ne_chunk(tokens)

            index = 0
            for chunk in ne_tree:
                if hasattr(chunk, 'label'):
                    # For example:
                    # chunk: Tree('GPE', [('New', 'NNP'), ('York', 'NNP')])
                    begin_pos = token_entries[index].span.begin
                    end_pos = token_entries[index + len(chunk) - 1].span.end
                    entity = EntityMention(input_pack, begin_pos, end_pos)
                    entity.ner_type = chunk.label()
                    index += len(chunk)
                else:
                    # For example:
                    # chunk: ('This', 'DT')
                    index += 1 
開發者ID:asyml,項目名稱:forte,代碼行數:24,代碼來源:nltk_processors.py

示例5: ne_chunked

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def ne_chunked():
    print()
    print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
    print("=" * 45)
    ROLE = re.compile(r'.*(chairman|president|trader|scientist|economist|analyst|partner).*')
    rels = []
    for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
        sent = nltk.ne_chunk(sent)
        rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7)
        for rel in rels:
            print('{0:<5}{1}'.format(i, rtuple(rel))) 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:13,代碼來源:relextract.py

示例6: ne_chunking

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def ne_chunking(targets):
    while True:
        tagged_words = (yield)
        ner_tagged = nltk.ne_chunk(tagged_words)
        for target in targets:
            target.send(ner_tagged) 
開發者ID:PacktPublishing,項目名稱:Hands-on-NLP-with-NLTK-and-scikit-learn-,代碼行數:8,代碼來源:nlp-6.1-nlp-pipeline.py

示例7: named_entities

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def named_entities(self):
        # word_tokenize should work well for most non-CJK languages
        text = nltk.word_tokenize(self.text)
        
        # TODO: this works only for english. Stanford's pos tagger supports
        # more languages
        # http://www.nltk.org/api/nltk.tag.html#module-nltk.tag.stanford
        # http://stackoverflow.com/questions/1639855/pos-tagging-in-german
        # PT corpus http://aelius.sourceforge.net/manual.html
        # 
        pos_tag = nltk.pos_tag(text)
        
        nes = nltk.ne_chunk(pos_tag)
        return nes 
開發者ID:Corollarium,項目名稱:geograpy2,代碼行數:16,代碼來源:extraction.py

示例8: sampleNE

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def sampleNE():
    sent = nltk.corpus.treebank.tagged_sents()[0]
    print(nltk.ne_chunk(sent)) 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:5,代碼來源:NER.py

示例9: sampleNE2

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def sampleNE2():
    sent = nltk.corpus.treebank.tagged_sents()[0]
    print(nltk.ne_chunk(sent, binary=True)) 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:5,代碼來源:NER.py

示例10: demo

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def demo(samplestrings):
    for s in samplestrings:
        words = nltk.word_tokenize(s)
        tagged = nltk.pos_tag(words)
        # chunks = nltk.ne_chunk(tagged)
        chunks = cp.parse(tagged)
        print(nltk.tree2conllstr(chunks))
        print(chunks) 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:10,代碼來源:OwnNE.py

示例11: fetch_name

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def fetch_name(resume_text):
  tokenized_sentences = nltk.sent_tokenize(resume_text)
  for sentence in tokenized_sentences:
    for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
      if hasattr(chunk, 'label'):# and chunk.label() == 'PERSON':
        chunk = chunk[0]
      (name, tag) = chunk
      if tag == 'NOUN':
        return name

  return "Applicant name couldn't be processed" 
開發者ID:skcript,項目名稱:cvscan,代碼行數:13,代碼來源:language_parser.py

示例12: get_entities

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def get_entities(self, document):
        entities = []
        for paragraph in document:
            for sentence in paragraph:
                trees = ne_chunk(sentence)
                for tree in trees:
                    if hasattr(tree, 'label'):
                        if tree.label() in self.labels:
                            entities.append(
                                ' '.join([child[0].lower() for child in tree])
                                )
        return entities 
開發者ID:foxbook,項目名稱:atap,代碼行數:14,代碼來源:transformers.py

示例13: ne_chunked

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def ne_chunked():
    IN = re.compile(r'.*\bin\b(?!\b.+ing)')
    rels = []
    for sent in nltk.corpus.treebank.tagged_sents()[:100]:
        sent = nltk.ne_chunk(sent)
        print extract_rels('ORG', 'LOC', sent, corpus='ace', pattern = IN) 
開發者ID:blackye,項目名稱:luscan-devel,代碼行數:8,代碼來源:relextract.py

示例14: nltk_tagger

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def nltk_tagger(self, token_text):
        tagged_words = nltk.pos_tag(token_text)
        ne_tagged = nltk.ne_chunk(tagged_words)
        return (ne_tagged)

    # Tag tokens with standard NLP BIO tags 
開發者ID:singnet,項目名稱:nlp-services,代碼行數:8,代碼來源:entity_recognizer_mod.py

示例15: extract_entities

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ne_chunk [as 別名]
def extract_entities(self, doc):
        sentence_list = []
        for sent in sent_tokenize(doc):
            sentence_list.append(
                [chunk for chunk in ne_chunk(pos_tag(word_tokenize(sent)))])
        return sentence_list

    # TODO spacy 
開發者ID:laugustyniak,項目名稱:textlytics,代碼行數:10,代碼來源:document_preprocessing.py


注:本文中的nltk.ne_chunk方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。