示例1: get_edges_in_tree

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def get_edges_in_tree(parent, leaves=[], path='', edges=[], lrb_rrb_fix=False):
    for i, node in enumerate(parent):
        if type(node) is nltk.Tree:
            from_node = path
            to_node = '{}-{}-{}'.format(path, node.label(), i)
            edges.append((from_node, to_node))

            if lrb_rrb_fix:
                if node.label() == '-LRB-':
                if node.label() == '-RRB-':

            edges, leaves = get_edges_in_tree(node, leaves, to_node, edges)
            from_node = path
            to_node = '{}-{}'.format(node, len(leaves))
            edges.append((from_node, to_node))
    return edges, leaves 

示例2: get_object

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def get_object(tree):
    """Get the object in the tree object.
    Method should remove unnecessary letters and words::


        tree (Tree): Parsed tree structure
        Resulting string of tree ``(Ex: "red car")``
    if isinstance(tree, Tree):
        if tree.label() == 'DT' or tree.label() == 'POS':
            return ''
        words = []
        for child in tree:
        return ' '.join([_f for _f in words if _f])
        return tree 

示例3: parse

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def parse(self, chunk_struct, trace=None):
        Apply the chunk parser to this input.

        :type chunk_struct: Tree
        :param chunk_struct: the chunk structure to be (further) chunked
            (this tree is modified, and is also returned)
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            highter will generate verbose tracing output.  This value
            overrides the trace level value that was given to the
        :return: the chunked output.
        :rtype: Tree
        if trace is None: trace = self._trace
        for i in range(self._loop):
            for parser in self._stages:
                chunk_struct = parser.parse(chunk_struct, trace=trace)
        return chunk_struct 

示例4: parse_sents

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def parse_sents(self, sents):
        Parse multiple sentences

        If "sents" is a string, it will be segmented into sentences using NLTK.
        Otherwise, each element of "sents" will be treated as a sentence.

        sents (str or Iterable[str] or Iterable[List[str]]): sentences to parse

        Returns: Iter[nltk.Tree]
        if isinstance(sents, STRING_TYPES):
            if self._tokenizer_lang is None:
                raise ValueError(
                    "No tokenizer available for this language. "
                    "Please split into individual sentences and tokens "
                    "before calling the parser."
            sents = nltk.sent_tokenize(sents, self._tokenizer_lang)

        for parse_raw, tags_raw, sentence in self._batched_parsed_raw(self._nltk_process_sents(sents)):
            yield self._make_nltk_tree(sentence, tags_raw, *parse_raw) 

示例5: decode

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def decode(self, output_dict                         )                           :
        Constructs an NLTK ``Tree`` given the scored spans. We also switch to exclusive
        span ends when constructing the tree representation, because it makes indexing
        into lists cleaner for ranges of text, rather than individual indices.

        Finally, for batch prediction, we will have padded spans and class probabilities.
        In order to make this less confusing, we remove all the padded spans and
        distributions from ``spans`` and ``class_probabilities`` respectively.
        all_predictions = output_dict[u'class_probabilities'].cpu().data
        all_spans = output_dict[u"spans"].cpu().data

        all_sentences = output_dict[u"tokens"]
        all_pos_tags = output_dict[u"pos_tags"] if all(output_dict[u"pos_tags"]) else None
        num_spans = output_dict[u"num_spans"].data
        trees = self.construct_trees(all_predictions, all_spans, num_spans, all_sentences, all_pos_tags)

        batch_size = all_predictions.size(0)
        output_dict[u"spans"] = [all_spans[i, :num_spans[i]] for i in range(batch_size)]
        output_dict[u"class_probabilities"] = [all_predictions[i, :num_spans[i], :] for i in range(batch_size)]

        output_dict[u"trees"] = trees
        return output_dict 

示例6: to_nltk_tree_general

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def to_nltk_tree_general(node, attr_list=("dep_", "pos_"), level=99999):
    """Tranforms a Spacy dependency tree into an NLTK tree, with certain spacy tree node attributes serving
    as parts of the NLTK tree node label content for uniqueness.

        node: The starting node from the tree in which the transformation will occur.
        attr_list: Which attributes from the Spacy nodes will be included in the NLTK node label.
        level: The maximum depth of the tree.

        A NLTK Tree (nltk.tree)

    # transforms attributes in a node representation
    value_list = [getattr(node, attr) for attr in attr_list]
    node_representation = "/".join(value_list)

    if level == 0:
        return node_representation

    if node.n_lefts + node.n_rights > 0:
        return Tree(node_representation, [to_nltk_tree_general(child, attr_list, level-1) for child in node.children])
        return node_representation 

示例7: get_node_representation

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def get_node_representation(tetre_format, token):
    """Given a format and a SpaCy node (spacy.token), returns this node representation using the NLTK tree (nltk.tree).
    It recursivelly builds a NLTK tree and returns it, not only the node itself.

        tetre_format: The attributes of this node that will be part of its string representation.
        token: The SpaCy node itself (spacy.token).

        A NLTK Tree (nltk.tree)

    params = tetre_format.split(",")
    node_representation = token.pos_

    if token.n_lefts + token.n_rights > 0:
        tree = Tree(node_representation,
                    [to_nltk_tree_general(child, attr_list=params, level=0) for child in token.children])
        tree = Tree(node_representation, [])

    return tree 

示例8: nltk_tree_to_qtree

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def nltk_tree_to_qtree(tree):
    """Transforms a NLTK Tree in a QTREE. A QTREE is a string representation of a tree.

    For details, please see: http://www.ling.upenn.edu/advice/latex/qtree/qtreenotes.pdf

        tree: The NLTK Tree (nltk.tree).

        A string with the QTREE representation of the NLTK Tree (nltk.tree).
    self_result = " [ "

    if isinstance(tree, Tree):
        self_result += " " + tree.label() + " "

        if len(tree) > 0:
            self_result += " ".join([nltk_tree_to_qtree(node) for node in sorted(tree)])

        self_result += " " + str(tree) + " "

    self_result += " ] "

    return self_result 

示例9: nltk_tree_to_logical_form

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def nltk_tree_to_logical_form(tree: Tree) -> str:
    Given an ``nltk.Tree`` representing the syntax tree that generates a logical form, this method
    produces the actual (lisp-like) logical form, with all of the non-terminal symbols converted
    into the correct number of parentheses.

    This is used in the logic that converts action sequences back into logical forms.  It's very
    unlikely that you will need this anywhere else.
    # nltk.Tree actually inherits from `list`, so you use `len()` to get the number of children.
    # We're going to be explicit about checking length, instead of using `if tree:`, just to avoid
    # any funny business nltk might have done (e.g., it's really odd if `if tree:` evaluates to
    # `False` if there's a single leaf node with no children).
    if len(tree) == 0:
        return tree.label()
    if len(tree) == 1:
        return tree[0].label()
    return "(" + " ".join(nltk_tree_to_logical_form(child) for child in tree) + ")" 

示例10: add_words

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def add_words(self, file_name):
        # Add words to the dictionary
        f_in = open(file_name, 'r')
        for line in f_in:
            if line.strip() == '':
            data = eval(line)
            sen_tree = Tree.fromstring(data['sentence1_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:
            sen_tree = Tree.fromstring(data['sentence2_parse'])
            words = self.filter_words(sen_tree)
            words = ['<s>'] + words + ['</s>']
            for word in words:

示例11: bft

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def bft(tree):
    """ Perform a breadth-first traversal of a tree.
    Return the nodes in a list in level-order.

        tree: a tree node
        lst: a list of tree nodes in left-to-right level-order
    lst = []
    queue = Queue.Queue()
    while not queue.empty():
        node = queue.get()
        for child in node:
            if isinstance(child, nltk.Tree):
    return lst 

示例12: traverse_tree

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def traverse_tree(tree, pro):
    """ Traverse a tree in a left-to-right, breadth-first manner,
    proposing any NP encountered as an antecedent. Returns the 
    tree and the position of the first possible antecedent.

        tree: the tree being searched
        pro: the pronoun being resolved (string)
    # Initialize a queue and enqueue the root of the tree
    queue = Queue.Queue()
    while not queue.empty():
        node = queue.get()
        # if the node is an NP, return it as a potential antecedent
        if "NP" in node.label() and match(tree, get_pos(tree,node), pro):
            return tree, get_pos(tree, node)
        for child in node:
            if isinstance(child, nltk.Tree):
    # if no antecedent is found, return None
    return None, None 

示例13: match_rules_context

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def match_rules_context(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
        dict: Context matched dictionary of matched rules or
        None if no match
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            for key, child_rules in match_rules.items():
                child_context = match_rules_context(context[key], child_rules, context)
                if child_context:
                    for k, v in child_context.items():
                        context[k] = v
                    return None
            return context
    return None 

示例14: match_rules_context_multi

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def match_rules_context_multi(tree, rules, parent_context={}):
    """Recursively matches a Tree structure with rules and returns context

        tree (Tree): Parsed tree structure
        rules (dict): See match_rules
        parent_context (dict): Context of parent call
        dict: Context matched dictionary of matched rules or
        None if no match
    all_contexts = []
    for template, match_rules in rules.items():
        context = parent_context.copy()
        if match_template(tree, template, context):
            child_contextss = []
            if not match_rules:
                all_contexts += [context]
                for key, child_rules in match_rules.items():
                    child_contextss.append(match_rules_context_multi(context[key], child_rules, context))
                all_contexts += cross_context(child_contextss)    
    return all_contexts 

示例15: match_template

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import Tree [as 别名]
def match_template(tree, template, args=None):
    """Check if match string matches Tree structure
        tree (Tree): Parsed Tree structure of a sentence
        template (str): String template to match. Example: "( S ( NP ) )"
        bool: If they match or not
    tokens = get_tokens(template.split())
    cur_args = {}
    if match_tokens(tree, tokens, cur_args):
        if args is not None:
            for k, v in cur_args.items():
                args[k] = v
        logger.debug('MATCHED: {0}'.format(template))
        return True
        return False 
