当前位置: 首页>>代码示例>>Python>>正文


Python Tree.fromstring方法代码示例

本文整理汇总了Python中nltk.tree.Tree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.fromstring方法的具体用法?Python Tree.fromstring怎么用?Python Tree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tree.Tree的用法示例。


在下文中一共展示了Tree.fromstring方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_tree

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def parse_tree(self, text, binary=False, preprocessed=False):
        nlp_output = self.nlp.annotate(text, properties={
            'annotators': 'tokenize,ssplit,pos,parse',
            'outputFormat': 'json',
            'parse.binaryTrees': 'true'
        })
        if type(nlp_output) == str:
            nlp_output = json.loads(nlp_output, strict=False)

        if len(nlp_output['sentences']) > 1:
            #merge trees from sentences
            tree_string = "(Top "
            for s in nlp_output['sentences']:
                p_tree = Tree.fromstring(s['parse'])
                tree_string += str(p_tree[0])
            tree_string += ")"
            merged_tree = Tree.fromstring(tree_string)
        else:
            #no merging required
            merged_tree = Tree.fromstring(nlp_output['sentences'][0]['parse'])
            #remove root
            merged_tree = merged_tree[0]

        if binary:
            nltk.treetransforms.chomsky_normal_form(merged_tree)

        if preprocessed:
            merged_tree = preprocess_parse_tree(merged_tree)

        return merged_tree
开发者ID:jonasrothfuss,项目名称:equity_news_thesis,代码行数:32,代码来源:parser.py

示例2: removeNounMods

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeNounMods(tree):
    tree_str = tsurgeon.remove_internal_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    tree_str = tsurgeon.remove_participle_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    return tree
开发者ID:DerrickZhu1,项目名称:11611teamproject-YenYuan-,代码行数:10,代码来源:simplify.py

示例3: parser_output_to_parse_deriv_trees

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def parser_output_to_parse_deriv_trees(output):
    lines = output.strip().split("\n")
    deriv_tree_lines = lines[::2]
    parse_tree_lines = lines[1::2]

    parse_trees = [Tree.fromstring(line.replace('\x06', 'epsilon_')) for line in parse_tree_lines if line != '']
    deriv_trees = [Tree.fromstring(line) for line in deriv_tree_lines if line != '']
    return parse_trees, deriv_trees
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:10,代码来源:old_parser_scorer.py

示例4: test_lbranch_parse

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def test_lbranch_parse(self):
        model = LBranch([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D El) (N gato)) (V come)) (N pescado)) (P .))"""),
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D La) (N gata)) (V come)) (N salmón)) (P .))"""),
        ]
        self.assertEqual(trees, trees2)
开发者ID:acapello,项目名称:PLN-2015,代码行数:12,代码来源:test_baselines.py

示例5: test_flat_parse

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def test_flat_parse(self):
        model = Flat([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("(S (D El) (N gato) (V come) (N pescado) (P .))"),
            Tree.fromstring("(S (D La) (N gata) (V come) (N salmón) (P .))"),
        ]
        self.assertEqual(trees, trees2)
开发者ID:acapello,项目名称:PLN-2015,代码行数:12,代码来源:test_baselines.py

示例6: extractParticiple

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def extractParticiple(tree):
    part_mod = tsurgeon.hasParticipleMod(tree)
    if part_mod != '':
        subject = tsurgeon.findSubject(tree)
        subject_words = Tree.fromstring(subject).leaves()
        part_tree = Tree.fromstring(part_mod)
        part_words = part_tree.leaves()
        # Ignoring inflection
        result_words = subject_words + ['is'] + part_words[1:]
        sentence = ' '.join(result_words).strip() + '.'
        return sentence
    pass
开发者ID:DerrickZhu1,项目名称:11611teamproject-YenYuan-,代码行数:14,代码来源:simplify.py

示例7: test_productions

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def test_productions(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)

        # Bugfix from official test (, start='S')
        model = UPCFG([t], start='S')

        prods = model.productions()

        prods2 = [
            ProbabilisticProduction(N('S'), [N('NP'), N('VP')], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Det'), N('Noun')], prob=0.5),
            ProbabilisticProduction(N('Det'), ['Det'], prob=1.0),
            ProbabilisticProduction(N('Noun'), ['Noun'], prob=1.0),
            ProbabilisticProduction(N('VP'), [N('Verb'), N('NP')], prob=1.0),
            ProbabilisticProduction(N('Verb'), ['Verb'], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Noun'), N('Adj')], prob=0.5),
            ProbabilisticProduction(N('Adj'), ['Adj'], prob=1.0),
        ]

        self.assertEqual(set(prods), set(prods2))
开发者ID:acapello,项目名称:PLN-2015,代码行数:28,代码来源:test_upcfg.py

示例8: removeLeadingMods

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeLeadingMods(tree):
    tree_str = tsurgeon.remove_leading_mods(tree)
    if tree_str != '':
        new = Tree.fromstring(tree_str)
        if new != tree:
            return removeLeadingMods(new)
    return tree
开发者ID:DerrickZhu1,项目名称:11611teamproject-YenYuan-,代码行数:9,代码来源:simplify.py

示例9: removeVerbMods

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeVerbMods(tree):
    tree_str = tsurgeon.remove_verb_modifiers(tree)
    if tree_str != '':
        new = Tree.fromstring(tree_str)
        if new != tree:
            return removeVerbMods(new)
    return tree
开发者ID:DerrickZhu1,项目名称:11611teamproject-YenYuan-,代码行数:9,代码来源:simplify.py

示例10: parse

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def parse(self, text):
        """
        NOTE: since the Stanford tagger and parser libraries are case-sensitive, the casing of the output of this
              method is preserved. Caller must remember to normalize the casing when conducting comparison
        :param text: text to be parsed
        :return: a SentenceParseResult object
        }
        """
        server = jsonrpc.ServerProxy(jsonrpc.JsonRpc20(),
                                     jsonrpc.TransportTcpIp(addr=(CORENLP_SERVER_HOST, CORENLP_SERVER_PORT)))

        parsed_sentences = loads(server.parse(text))['sentences']
        if len(parsed_sentences) > 1:
            raise Exception('Multi-sentence query is not supported')
        parsed_sentence = parsed_sentences[0]

        word_tokens = [ParsedWordToken(word_wire_format) for word_wire_format in parsed_sentence['words']]
        # word_tokens = self._recover_contractions(word_tokens)

        normalized_sentence = ' '.join([word_token.text for word_token in word_tokens])

        parsed_tree = Tree.fromstring(parsed_sentence['parsetree'])

        word_dependency = SentenceWordDependency(parsed_sentence['dependencies'])

        return SentenceParseResult(word_tokens=word_tokens,
                                   normalized_sentence=normalized_sentence,
                                   parsed_tree=parsed_tree,
                                   word_dependency=word_dependency)
开发者ID:shuoh,项目名称:question-processor,代码行数:31,代码来源:sentence_parser.py

示例11: tag_var_nodes

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def tag_var_nodes(vars_dir, trees_dir, tagged_dir):
    """
    Tag variable nodes in tree

    Tag variables nodes in trees with "_VAR:f:n:m:e" suffix where
    f is the name of the parse file,
    n is the tree number,
    m is the variable's node number and
    e is name of the pattern used for extracting this variable.
    Will only output those trees containing at least two variables.
    """
    # At first I used the tregex's '-f' option to print the filename,
    # but when traversing the files in a directory,
    # it prints the wrong filenames (after the first one?),
    # so now the filename is encoded in the node label too.
    tagged_dir = Path(tagged_dir)
    tagged_dir.makedirs_p()

    for vars_fname in Path(vars_dir).glob('*.json'):
        d = defaultdict(list)

        # create a dict mapping each tree number to a list of
        # (nodeNumber, extractName) tuples for its variables
        for record in json.load(vars_fname.open()):
            pair = record['nodeNumber'], record['key']
            d[record['treeNumber']].append(pair)

        lemtree_fname = record['filename']
        parses = (Path(trees_dir) / lemtree_fname).lines()
        tagged_parses = []

        for tree_number, pairs in d.items():
            if len(pairs) > 1:
                # tree numbers in records count from one
                tree = Tree.fromstring(parses[tree_number - 1])
                # get NLTK-style indices for all nodes in a preorder
                # traversal of the tree
                positions = tree.treepositions()
                vars_count = 0

                for node_number, key in pairs:
                    # node numbers in records count from one
                    position = positions[node_number - 1]
                    subtree = tree[position]
                    try:
                        subtree.set_label(
                            '{}_VAR_{}'.format(subtree.label(), key))
                    except AttributeError:
                        log.error('skipping variable "{}" because it is a leaf '
                                  'node ({})'.format(subtree, key))
                    else:
                        vars_count += 1

                if vars_count > 1:
                    tagged_parses.append(tree.pformat(margin=99999))

        if tagged_parses:
            tagged_fname = derive_path(lemtree_fname, new_dir=tagged_dir)
            log.info('writing tagged trees to ' + tagged_fname)
            tagged_fname.write_lines(tagged_parses)
开发者ID:OC-NTNU,项目名称:baleen-python,代码行数:62,代码来源:rels.py

示例12: question

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def question(inputstr):
    entities = supersense_tag(inputstr)
#     print("Supersense-tagging done")
    entities.update(named_entities(inputstr))
#     print("NER done")
    main_tree = parser.raw_parse(inputstr).next()
#     print("Parsing done")
    '''
    main_tree_str = save_embedded_clause(main_tree_str)
    print(main_tree_str)
    '''
    main_tree_str = clean_sentence(main_tree)
    
#     Tree.fromstring(main_tree_str).pprint()
    # TODO: mark_unmovable_tags

    main_tree = inverse_verb(main_tree_str)
    sentence = str(' '.join(Tree.fromstring(main_tree_str).leaves()))
    sentence_inversed = str(' '.join(main_tree.leaves()))
    questions = []
    prep = []  # use to store prep when traverse the tree
    gen_question_recur(main_tree, sentence_inversed, sentence, questions, entities, prep)
    questions = [cleanup_question(q) for q in questions]
    questions.append(fix_output(main_tree))
    return questions
开发者ID:DerrickZhu1,项目名称:11611teamproject-YenYuan-,代码行数:27,代码来源:gen_question.py

示例13: add_tree

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
    def add_tree(self, datum):
        # parse tree and binarize
        tree = Tree.fromstring(datum["raw_tree"])
        tree.chomsky_normal_form()
        tree.collapse_unary(collapsePOS=True)
        tree = ParentedTree.convert(tree)

        # assign indices to subtrees
        indices = {}
        counter = 0
        for t in tree.subtrees():
            indices[t.treeposition()] = counter
            counter += 1

        # generate parent pointers and labels
        # (labels = one instance of sent in sents by treelstm terminology)
        parents = [0] * (counter - 1)
        labels = []
        counter = 0
        for t in tree.subtrees():
            parent = t.parent()
            if parent != None:
                parents[counter] = indices[parent.treeposition()]
                counter += 1
            if type(t[0]) is str or type(t[0]) is unicode: labels.append(t[0])

        self.parents_file.write(" ".join(map(str, parents)) + "\n")
        self.sents_file.write(" ".join(labels) + "\n")
        self.trees.append(datum)
        return len(self.trees) - 1 # ID
开发者ID:Jasmeet107,项目名称:serapis,代码行数:32,代码来源:predict.py

示例14: read_segtree_file

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def read_segtree_file(fn):
    """reads a string representing a discourse tree (from the seg.
       annotation) and returns a list of its child tree objects"""
    with codecs.open(fn, 'r', 'utf-8') as f:
        s = f.read()
        text_tree = Tree.fromstring(s, read_leaf=prefix_number_seg_token)
        return [segment for segment in text_tree]
开发者ID:WladimirSidorenko,项目名称:DiscourseSegmenter,代码行数:9,代码来源:segmentation_tree.py

示例15: find_subtrees

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def find_subtrees(tree, depth):
    """
    Returns all subtrees at a given depth

    Arguments
    ---------
    tree: either an nltk.tree.Tree or a PTB-formatted string
    depth: the target depth

    Returns
    -------
    list of nlt.tree.Tree objects representing the selected subtrees

    >>> ptb_str = "(ROOT (S (NP (DT The) (VBG following)) (VP (VBP are) (NP (NP (JJ major) (NN news) (NNS items)) (PP (IN in) (NP (NP (VBG leading) (JJ Turkish) (NNS newspapers)) (PP (IN on) (NP (NNP Monday))))))) (. .)))"
    >>> ptb_tree = Tree.fromstring(ptb_str)   
    >>> subtrees = find_subtrees(ptb_str, 2)  # find_subtrees accepts strings
    >>> [t.label() for t in subtrees]         # and it returns a list of subtrees (ojbects of the kind nlt.tree.Tree)
    ['NP', 'VP', '.']
    >>> subtrees = find_subtrees(ptb_tree, 3) # and trees
    >>> [t.label() for t in subtrees]
    ['DT', 'VBG', 'VBP', 'NP']
    >>> subtrees = find_subtrees(ptb_tree, 4) 
    >>> [t.label() for t in subtrees]
    ['NP', 'PP']
    """
    if isinstance(tree, str):
        tree = Tree.fromstring(tree)
    subtrees = []
    _find_subtrees(tree, 0, depth, subtrees)
    return subtrees
开发者ID:karins,项目名称:CoherenceFramework,代码行数:32,代码来源:dseq.py


注:本文中的nltk.tree.Tree.fromstring方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。