当前位置: 首页>>代码示例>>Python>>正文


Python tree.ParentedTree类代码示例

本文整理汇总了Python中nltk.tree.ParentedTree的典型用法代码示例。如果您正苦于以下问题:Python ParentedTree类的具体用法?Python ParentedTree怎么用?Python ParentedTree使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ParentedTree类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_labeled_nodes

    def test_labeled_nodes(self):
        '''
        Test labeled nodes.

        Test case from Emily M. Bender.
        '''
        search = '''
            # macros
            @ SBJ /SBJ/;
            @ VP /VP/;
            @ VB /VB/;
            @ VPoB /V[PB]/;
            @ OBJ /OBJ/;

            # 1 svo
            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'''
        sent1 = ParentedTree.fromstring(
            '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))')
        sent2 = ParentedTree.fromstring(
            '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))')
        search_firsthalf = (search.split('\n\n')[0] +
                            'S < @SBJ < (@VP < (@VB $.. @OBJ))')
        search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))'

        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
        self.assertEqual(list(tgrep.tgrep_positions(search, [sent1])),
                         list(tgrep.tgrep_positions(search_rewrite, [sent1])))
        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
        self.assertEqual(list(tgrep.tgrep_positions(search, [sent2])),
                         list(tgrep.tgrep_positions(search_rewrite, [sent2])))
开发者ID:DrDub,项目名称:nltk,代码行数:34,代码来源:test_tgrep.py

示例2: assign_slots

def assign_slots(tokens, tag_tree, word_tree):
    stopword_list = stopwords.words('english')
    tokens_with_slot_tags = []
    word_tree = ParentedTree.convert(word_tree)
    tag_tree = ParentedTree.convert(tag_tree)
    word_tree_with_cats = tag_words_with_categories(word_tree)
    tag_tree_with_cats = tag_words_with_categories(tag_tree)
    for i, word in enumerate(tokens):
        tag = finalize_tags(i, word, tag_tree_with_cats, word_tree_with_cats) 
        tokens_with_slot_tags.append((word, tag))
    found_query_focus = False
    for i, item in enumerate(tokens_with_slot_tags):
        word, tag = item
        if tag in ['USER','MEDIA','NETWORK'] and not found_query_focus:
            tokens_with_slot_tags[i] = (word, 'SEARCH')
            found_query_focus = True
        elif tag == UNK:
            tokens_with_slot_tags[i] = (word, 'KEYWORD')
    slots = {}
    for word, tag in tokens_with_slot_tags:
        if tag == 'SKIP':
            continue
        elif tag == 'KEYWORD':
            if 'KEYWORDS' not in slots:
                slots['KEYWORDS'] = []
            if word not in stopword_list and word not in PUNCTUATION:
                slots['KEYWORDS'].append(word)
        else:
            if tag not in slots:
                slots[tag] = word
            else:
                previous_words = slots[tag]
                slots[tag] = ' '.join([previous_words, word])
    return slots
开发者ID:sophiavanvalkenburg,项目名称:queryparser,代码行数:34,代码来源:queryparser.py

示例3: merge_tree_nnps

def merge_tree_nnps(tree):
    """
    Takes a parse tree and merges any consecutive leaf nodes that come from NNPs
    For example if there is a segment of:
        (NP
            (JJ old)
            (NNP Pierre)
            (NNP Vinken)
        )
    Returns:
        (NP
            (JJ old)
            (NNP PierreVinken)
        )
    """

    # require a parented tree to get a subtrees tree position
    p = ParentedTree.convert(tree)

    # iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be
    for s in p.subtrees(filter=lambda s: s.height() == 3):
        # merge NNP's in the list representation of this trees children: [(POS, word), ...] 
        new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s])
        child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase)
        # create new subtree with merged NNP's
        new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str))

        # replace old subtree with new subtree
        p[s.treeposition()] = new_s
    return Tree.convert(p)
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:30,代码来源:preprocess.py

示例4: lappinleasse

def lappinleasse(parsetree, i):
    global entitySet
    for np in parsetree.subtrees(lambda x: x.label() == 'NP'):
        if 'PRP' in np[0].label():
            if np[0,0].lower() == 'it' and ispleonastic(np, parsetree): continue
            maxsalience = -1
            referent = None
            e = Entity(np, parsetree, i)
            for entity in entitySet:
                if entity.sentencenum >= i - 4 and e.agreeswith(entity) and maxsalience < entity.salience:
                    maxsalience = entity.salience
                    referent = entity
            try:
                referent.salience += e.salience
                referent.gender = e.gender
                referent.phrases.add(np[0,0] + str(i))
                orig = np[0,0]
                if np[0].label() == 'PRP$':
                    np[0] = ParentedTree.fromstring('(SUB <'+ referent.name + "'s>)")
                    print('PRP$ substitution', orig, '-->', referent.name)
                else:
                    np[0] = ParentedTree.fromstring('(SUB <' + referent.name + '>)')
                    print('PRP substitution', orig, '-->', referent.name)
            except:
                print('No substitution found for ', orig)
                continue

        elif np[0].label() == 'EX': continue
        else: entitySet.add(Entity(np, parsetree, i))
#    print('Discourse model after sentence', i + 1, ':')
#    for entity in entitySet: print(entity)
    halve()
开发者ID:5aurabhpathak,项目名称:all-I-ve-done,代码行数:32,代码来源:lappinleasse.py

示例5: syntax_similarity_two_documents

 def syntax_similarity_two_documents(self, doc1, doc2, average=False): #syntax similarity of two single documents
     global numnodes
     doc1sents = self.sent_detector.tokenize(doc1.strip())
     doc2sents = self.sent_detector.tokenize(doc2.strip())
     for s in doc1sents: # to handle unusual long sentences.
         if len(s.split())>100:
             return "NA"
     for s in doc2sents:
         if len(s.split())>100:
             return "NA"
     try: #to handle parse errors. Parser errors might happen in cases where there is an unsuall long word in the sentence.
         doc1parsed = self.parser.raw_parse_sents((doc1sents))
         doc2parsed = self.parser.raw_parse_sents((doc2sents))
     except Exception as e:
         sys.stderr.write(str(e))
         return "NA"
     costMatrix = []
     doc1parsed = list(doc1parsed)
     for i in range(len(doc1parsed)):
         doc1parsed[i] = list(doc1parsed[i])[0]
     doc2parsed = list(doc2parsed)
     for i in range(len(doc2parsed)):
         doc2parsed[i] = list(doc2parsed[i])[0]
     for i in range(len(doc1parsed)):
         numnodes = 0
         sentencedoc1 = ParentedTree.convert(doc1parsed[i])
         tempnode = Node(sentencedoc1.root().label())
         new_sentencedoc1 = self.convert_mytree(sentencedoc1,tempnode)
         temp_costMatrix = []
         sen1nodes = numnodes
         for j in range(len(doc2parsed)):
             numnodes=0.0
             sentencedoc2 = ParentedTree.convert(doc2parsed[j])
             tempnode = Node(sentencedoc2.root().label())
             new_sentencedoc2 = self.convert_mytree(sentencedoc2,tempnode)
             ED = simple_distance(new_sentencedoc1, new_sentencedoc2)
             ED = ED / (numnodes + sen1nodes)
             temp_costMatrix.append(ED)
         costMatrix.append(temp_costMatrix)
     costMatrix = np.array(costMatrix)
     if average==True:
         return 1-np.mean(costMatrix)
     else:
         indexes = su.linear_assignment(costMatrix)
         total = 0
         rowMarked = [0] * len(doc1parsed)
         colMarked = [0] * len(doc2parsed)
         for row, column in indexes:
             total += costMatrix[row][column]
             rowMarked[row] = 1
             colMarked [column] = 1
         for k in range(len(rowMarked)):
             if rowMarked[k]==0:
                 total+= np.min(costMatrix[k])
         for c in range(len(colMarked)):
             if colMarked[c]==0:
                 total+= np.min(costMatrix[:,c])
         maxlengraph = max(len(doc1parsed),len(doc2parsed))
         return 1-(total/maxlengraph)
开发者ID:USC-CSSL,项目名称:CASSIM,代码行数:59,代码来源:Cassim.py

示例6: test_exact_match

def test_exact_match():
    tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN cat)) (VP bit) (NP (DT a) (NN cat)))')
    node = search_by_exact_string_matching(tree, 'cat')
    assert_equal(len(node), 2)
    assert_equal(node[0], ParentedTree.fromstring('(NN cat)'))

    node = search_by_exact_string_matching(tree, 'a cat')
    assert_equal(len(node), 1)
    assert_equal(node[0], ParentedTree.fromstring('(NP (DT a) (NN cat))'))
开发者ID:xiaohan2012,项目名称:mynlp,代码行数:9,代码来源:test_tree_search.py

示例7: get_sentence_posteriors

def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
    meaning_probs = {}
    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
        meaning = m2s(lmk,rel)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree probs: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    if extra_meaning:
        meaning = m2s(*extra_meaning)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree prob: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    summ = sum(meaning_probs.values())
    for key in meaning_probs:
        meaning_probs[key] /= summ
    return meaning_probs.items()
开发者ID:marcovzla,项目名称:bolt,代码行数:30,代码来源:location_from_sentence.py

示例8: disfile2tree

 def disfile2tree(dis_filepath):
     """converts a *.dis file into a ParentedTree (NLTK) instance"""
     with open(dis_filepath) as f:
         rst_tree_str = f.read().strip()
         rst_tree_str = fix_rst_treebank_tree_str(rst_tree_str)
         rst_tree_str = convert_parens_in_rst_tree_str(rst_tree_str)
         return ParentedTree.fromstring(rst_tree_str)
开发者ID:hernan-erasmo,项目名称:discoursegraphs,代码行数:7,代码来源:dis.py

示例9: findSentencePTreeToken

def findSentencePTreeToken(sentence, keyword):
	import nltk
	from nltk.tree import ParentedTree
	stemmed = _lemma_(keyword)

	tmp = proc.parse_doc(sentence)
	i = 0
	numSentences = len(tmp['sentences'])
	rs = []
	for i in range(0, numSentences):
		p = tmp['sentences'][i]['parse']
		ptree = ParentedTree.fromstring(p)

		# rs = []
		for i in range(0, len(ptree.leaves())):
			tree_position = ptree.leaf_treeposition(i)

			node = ptree[tree_position]

			if _stem_(node)==stemmed:
				tree_position = tree_position[0:len(tree_position)-1]
				rs.append(ptree[tree_position])
		# if len(rs)>0:
		# 	return rs
	return rs
开发者ID:gkotsis,项目名称:negation-detection,代码行数:25,代码来源:negation_detection.py

示例10: parse

def parse(sentence, use_cache=True, parser='stanford'):

    cache_key = "parse_trees_{0}".format(parser)
    valid_lines = None

    if use_cache:
        cache_attempt = cache_get(cache_key, sentence)
        if cache_attempt:
            valid_lines = cache_attempt

    if valid_lines is None:
        if parser == "stanford":
            response = parse_stanford(sentence, use_cache=use_cache)
        elif parser == "malt":
            response = parse_malt(sentence, use_cache=use_cache)
        else:
            return []

        valid_lines = [line for line in response.split("\n") if len(line) > 2 and line[0] == "(" and line[-1] == ")"]

        if use_cache:
            cache_set(cache_key, sentence, valid_lines)

    # throw away the garbgage we don't want from the parser's response.
    # this could probably get us in trouble since it'll hide errors etc,
    # but we got deadlines....
    trees = [ParentedTree.parse(line) for line in valid_lines]

    return trees
开发者ID:nader92011,项目名称:zdotfiles,代码行数:29,代码来源:parsers.py

示例11: test_node_nocase

 def test_node_nocase(self):
     '''
     Test selecting nodes using case insensitive node names.
     '''
     tree = ParentedTree.fromstring('(S (n x) (N x))')
     self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
     self.assertEqual(list(tgrep.tgrep_positions('[email protected]"N"', [tree])), [[(0,), (1,)]])
开发者ID:DrDub,项目名称:nltk,代码行数:7,代码来源:test_tgrep.py

示例12: parse_sentences

 def parse_sentences(self, filename, num_sentences):
     """Parses each one-line sentence into a syntax tree"""
     # Open the file and parse a given number of sentences
     f = open(filename, 'r')
     if num_sentences == 'all':
         num_sentences = -1
     count = 0
     for sentence in f.readlines()[:num_sentences]:
         if count%10==0:
             print("Number of sentences trained: ",count)
         # Get possible parse trees
         trees = self.parser.raw_parse(sentence.lower())
         for tree in trees:
             self.nonterminal_counts['ROOT'] += 1
             tokenized_sentence = self.tokenize_sentence(sentence)
             # Only extract rules from sentences with greater than 8 tokens,
             # to avoid adding rules that generate short, ungrammatical sentences
             if len(tokenized_sentence) > 8:
                 self.extract_rules(tree)
             # Convert the tree into a ParentedTree, 
             # which is an NLTK tree that keeps pointers to each node's parent
             ptree = ParentedTree.convert(tree)
             # Calculate the bigram counts for this sentence
             self.get_bigram(ptree, tokenized_sentence)
         count+=1
开发者ID:phuongkhdinh,项目名称:NaturalLanguageProcessing_F2015,代码行数:25,代码来源:training.py

示例13: test_use_macros

 def test_use_macros(self):
     '''
     Test defining and using tgrep2 macros.
     '''
     tree = ParentedTree.fromstring(
         '(VP (VB sold) (NP (DET the) '
         '(NN heiress)) (NP (NN deed) (PREP to) '
         '(NP (DET the) (NN school) (NN house))))'
     )
     self.assertEqual(
         list(
             tgrep.tgrep_positions(
                 '@ NP /^NP/;\[email protected] NN /^NN/;\[email protected] !< @NP !$.. @NN', [tree]
             )
         ),
         [[(1,), (2, 2)]],
     )
     # use undefined macro @CNP
     self.assertRaises(
         tgrep.TgrepException,
         list,
         tgrep.tgrep_positions(
             '@ NP /^NP/;\[email protected] NN /^NN/;\[email protected] !< @NP !$.. @NN', [tree]
         ),
     )
开发者ID:rmalouf,项目名称:nltk,代码行数:25,代码来源:test_tgrep.py

示例14: getConsituentTreeDistribution

def getConsituentTreeDistribution(core_nlp_files):
    diff_productions = dict()
    production_dict_for_files = dict()
    for genre_file_path, genre_file_name in core_nlp_files:
        production_dict = dict()
        dictionary = dict()
        with open(genre_file_path) as f:
            lines = f.readlines()
            assert len(lines) == 1
            line = lines[0]
            line = 'dictionary=' + line
            exec(line)
            # print genre_file_path, dictionary
            sentences = dictionary[SENTENCES]
            for sent in sentences:
                parsetree = sent[PARSE_TREE]
                t = ParentedTree.fromstring(parsetree)
                prods = t.productions()
                for prod in prods:
                    if prod not in diff_productions:
                        diff_productions[prod] = 0.0
                    if prod not in production_dict:
                        production_dict[prod] = 0.0
                    diff_productions[prod] += 1.0
                    production_dict[prod] += 1.0
            production_dict_for_files[genre_file_name.replace('_corenlp1000.txt', '.txt')] = production_dict
    return production_dict_for_files, diff_productions
开发者ID:SriganeshNk,项目名称:Literary-Success,代码行数:27,代码来源:Sentence.py

示例15: get_modparse

def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)
    try:
        res = sp_db.all()[0]
        parsetree = res.original_parse
        modparsetree = res.modified_parse
    except:
        print "parse.py: 103: " + sentence
        parses = parse_sentences([sentence])
        if len(parses) == 0:
            raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty'))

        modparses = modify_parses(parses)
        for i,chunk in enumerate(modparses[:]):
            for j,modparse in enumerate(chunk):
                if 'LANDMARK-PHRASE' in modparse:
                    modparses[i] = modparse
                    parses[i] = parses[i][j]
                    break
            if isinstance(modparses[i],list):
                modparses[i] = modparses[i][0]
                parses[i] = parses[i][0]

        parsetree = parses[0]
        modparsetree = modparses[0]
        try:
            SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)
        except Exception as e:
            print e

    if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1:
        raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree)))

    return parsetree, modparsetree
开发者ID:arebgun,项目名称:bolt,代码行数:35,代码来源:parse.py


注:本文中的nltk.tree.ParentedTree类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。