本文整理汇总了Python中nltk.parse.dependencygraph.DependencyGraph类的典型用法代码示例。如果您正苦于以下问题:Python DependencyGraph类的具体用法?Python DependencyGraph怎么用?Python DependencyGraph使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DependencyGraph类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
def parse(self, tokens):
"""
Parses the list of tokens subject to the projectivity constraint
and the productions in the parser's grammar. This uses a method
similar to the span-concatenation algorithm defined in Eisner (1996).
It returns the most probable parse derived from the parser's
probabilistic dependency grammar.
"""
self._tokens = list(tokens)
chart = []
for i in range(0, len(self._tokens) + 1):
chart.append([])
for j in range(0, len(self._tokens) + 1):
chart[i].append(ChartCell(i, j))
if i == j + 1:
if tokens[i - 1] in self._grammar._tags:
for tag in self._grammar._tags[tokens[i - 1]]:
chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], [tag]))
else:
print "No tag found for input token '%s', parse is impossible." % tokens[i - 1]
return []
for i in range(1, len(self._tokens) + 1):
for j in range(i - 2, -1, -1):
for k in range(i - 1, j, -1):
for span1 in chart[k][j]._entries:
for span2 in chart[i][k]._entries:
for newspan in self.concatenate(span1, span2):
chart[i][j].add(newspan)
graphs = []
trees = []
max_parse = None
max_score = 0
for parse in chart[len(self._tokens)][0]._entries:
conll_format = ""
malt_format = ""
for i in range(len(tokens)):
malt_format += "%s\t%s\t%d\t%s\n" % (tokens[i], "null", parse._arcs[i] + 1, "null")
conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
i + 1,
tokens[i],
tokens[i],
parse._tags[i],
parse._tags[i],
"null",
parse._arcs[i] + 1,
"null",
"-",
"-",
)
dg = DependencyGraph(conll_format)
score = self.compute_prob(dg)
if score > max_score:
max_parse = dg.tree()
max_score = score
return [max_parse, max_score]
示例2: parse
def parse(self, tokens):
"""
Performs a projective dependency parse on the list of tokens using
a chart-based, span-concatenation algorithm similar to Eisner (1996).
:param tokens: The list of input tokens.
:type tokens: list(str)
:return: An iterator over parse trees.
:rtype: iter(Tree)
"""
self._tokens = list(tokens)
chart = []
for i in range(0, len(self._tokens) + 1):
chart.append([])
for j in range(0, len(self._tokens) + 1):
chart[i].append(ChartCell(i, j))
if i == j + 1:
chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ['null']))
for i in range(1, len(self._tokens) + 1):
for j in range(i - 2, -1, -1):
for k in range(i - 1, j, -1):
for span1 in chart[k][j]._entries:
for span2 in chart[i][k]._entries:
for newspan in self.concatenate(span1, span2):
chart[i][j].add(newspan)
for parse in chart[len(self._tokens)][0]._entries:
conll_format = ""
# malt_format = ""
for i in range(len(tokens)):
# malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
# conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
# Modify to comply with the new Dependency Graph requirement (at least must have an root elements)
conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (
i + 1,
tokens[i],
tokens[i],
'null',
'null',
'null',
parse._arcs[i] + 1,
'ROOT',
'-',
'-',
)
dg = DependencyGraph(conll_format)
# if self.meets_arity(dg):
yield dg.tree()
示例3: to_depgraph
def to_depgraph(self, rel=None):
depgraph = DependencyGraph()
nodelist = depgraph.nodelist
self._to_depgraph(nodelist, 0, 'ROOT')
#Add all the dependencies for all the nodes
for node_addr, node in enumerate(nodelist):
for n2 in nodelist[1:]:
if n2['head'] == node_addr:
node['deps'].append(n2['address'])
depgraph.root = nodelist[1]
return depgraph
示例4: parse
def parse(self, tokens):
"""
Parses the list of tokens subject to the projectivity constraint
and the productions in the parser's grammar. This uses a method
similar to the span-concatenation algorithm defined in Eisner (1996).
It returns the most probable parse derived from the parser's
probabilistic dependency grammar.
"""
self._tokens = list(tokens)
chart = []
for i in range(0, len(self._tokens) + 1):
chart.append([])
for j in range(0, len(self._tokens) + 1):
chart[i].append(ChartCell(i,j))
if i==j+1:
if tokens[i-1] in self._grammar._tags:
for tag in self._grammar._tags[tokens[i-1]]:
chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], [tag]))
else:
chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], [u'NULL']))
for i in range(1,len(self._tokens)+1):
for j in range(i-2,-1,-1):
for k in range(i-1,j,-1):
for span1 in chart[k][j]._entries:
for span2 in chart[i][k]._entries:
for newspan in self.concatenate(span1, span2):
chart[i][j].add(newspan)
trees = []
max_parse = None
max_score = 0
for parse in chart[len(self._tokens)][0]._entries:
conll_format = ""
malt_format = ""
for i in range(len(tokens)):
malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
#conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
# Modify to comply with recent change in dependency graph such that there must be a ROOT element.
conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
dg = DependencyGraph(conll_format)
score = self.compute_prob(dg)
trees.append((score, dg.tree()))
trees.sort(key=lambda e: -e[0])
if trees == []:
trees = [(0.0,Tree(tokens[0],tokens[1:]))]
return ((score,tree) for (score, tree) in trees)
示例5: to_depgraph
def to_depgraph(self, rel=None):
from nltk.parse.dependencygraph import DependencyGraph
depgraph = DependencyGraph()
nodelist = depgraph.nodelist
self._to_depgraph(nodelist, 0, "ROOT")
# Add all the dependencies for all the nodes
for node_addr, node in enumerate(nodelist):
for n2 in nodelist[1:]:
if n2["head"] == node_addr:
node["deps"].append(n2["address"])
depgraph.root = nodelist[1]
return depgraph
示例6: to_depgraph
def to_depgraph(self, rel=None):
from nltk.parse.dependencygraph import DependencyGraph
depgraph = DependencyGraph()
nodes = depgraph.nodes
self._to_depgraph(nodes, 0, 'ROOT')
# Add all the dependencies for all the nodes
for address, node in nodes.items():
for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
if n2['head'] == address:
relation = n2['rel']
node['deps'].setdefault(relation,[])
node['deps'][relation].append(n2['address'])
depgraph.root = nodes[1]
return depgraph
示例7: tagged_parse_sents
def tagged_parse_sents(self, sentences, verbose=False):
"""
Use MaltParser to parse multiple sentences. Takes multiple sentences
where each sentence is a list of (word, tag) tuples.
The sentences must have already been tokenized and tagged.
:param sentences: Input sentences to parse
:type sentence: list(list(tuple(str, str)))
:return: iter(iter(``DependencyGraph``)) the dependency graph representation
of each sentence
"""
if not self._malt_bin:
raise Exception("MaltParser location is not configured. Call config_malt() first.")
if not self._trained:
raise Exception("Parser has not been trained. Call train() first.")
input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll',
dir=self.working_dir,
delete=False)
output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll',
dir=self.working_dir,
delete=False)
try:
for sentence in sentences:
for (i, (word, tag)) in enumerate(sentence, start=1):
input_str = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %\
(i, word, '_', tag, tag, '_', '0', 'a', '_', '_')
input_file.write(input_str.encode("utf8"))
input_file.write(b'\n\n')
input_file.close()
cmd = ['java'] + self.additional_java_args + ['-jar', self._malt_bin,
'-w', self.working_dir,
'-c', self.mco, '-i', input_file.name,
'-o', output_file.name, '-m', 'parse']
ret = self._execute(cmd, verbose)
if ret != 0:
raise Exception("MaltParser parsing (%s) failed with exit "
"code %d" % (' '.join(cmd), ret))
# Must return iter(iter(Tree))
return (iter([dep_graph]) for dep_graph in DependencyGraph.load(output_file.name))
finally:
input_file.close()
os.remove(input_file.name)
output_file.close()
os.remove(output_file.name)
示例8: make_dep_tree
def make_dep_tree(sent, deps):
adj = merge_with(cons, [], *[{x:[m]} for x,m,_ in deps])
heads = dict([(m,h) for h,m,_ in deps])
rel = dict([(m,rel) for _,m,rel in deps])
n = len(sent["x"])
pos = sent["pos"]
x = sent["x"]
nodelist = defaultdict(lambda: {"address": -1, "head": -1, "deps": [], "rel": "", "tag": "", "word": None})
for i in range(1, n):
node = nodelist[i]
node["address"] = i
node["head"] = heads[i]
node["deps"] = adj[i] if adj.has_key(i) else []
node["tag"] = pos[i]
node["word"] = x[i]
node["rel"] = rel[i]
g = DependencyGraph()
g.get_by_address(0)["deps"] = adj[0] if adj.has_key(0) else []
[g.add_node(node) for node in nodelist.values()]
g.root = nodelist[adj[0][0]]
return g
示例9: tree_to_graph
def tree_to_graph(tree):
'''Converts a tree structure to a graph structure. This is for the accuracy() function.
Args: tree: the tree to convert
Returns: a graph representing the tree. note that this graph is really only
useable in accuracy() (the only attribute we bother setting is 'head')
Raises: None
'''
# nodes are dictionaries, which are mutable. So we copy them so we can
# change attributes without changing the original nodes
tree2 = tree_map(copy.copy, tree)
# set the head attributes of each node according to our tree structure
def set_heads(tree, parent=0):
n = label(tree)
n['head'] = parent
if isinstance(tree, Tree):
[set_heads(child, n['address']) for child in tree]
set_heads(tree2)
# now we need to generate our nodelist. This requires getting all the
# elements ("labels") of our tree and putting them in a flat list
def all_elems(tree):
elems = [label(tree)]
if isinstance(tree, Tree):
for t in tree:
elems += all_elems(t)
return elems
dg = DependencyGraph()
dg.root = dg.nodelist[0]
all = all_elems(tree2)
# nodelist should be ordered by address
all.sort(key=lambda t: label(t)['address'])
dg.nodelist += all
return dg
示例10: tagged_parse
def tagged_parse(self, sentence, verbose=False):
"""
Use MaltParser to parse a sentence. Takes a sentence as a list of
(word, tag) tuples; the sentence must have already been tokenized and
tagged.
:param sentence: Input sentence to parse
:type sentence: list(tuple(str, str))
:return: ``DependencyGraph`` the dependency graph representation of the sentence
"""
if not self._malt_bin:
raise Exception("MaltParser location is not configured. Call config_malt() first.")
if not self._trained:
raise Exception("Parser has not been trained. Call train() first.")
input_file = tempfile.NamedTemporaryFile(prefix='malt_input.conll',
dir=self.working_dir,
delete=False)
output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll',
dir=self.working_dir,
delete=False)
try:
for (i, (word, tag)) in enumerate(sentence, start=1):
input_file.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
(i, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
input_file.write('\n')
input_file.close()
cmd = ['java', '-jar', self._malt_bin, '-w', self.working_dir,
'-c', self.mco, '-i', input_file.name,
'-o', output_file.name, '-m', 'parse']
ret = self._execute(cmd, verbose)
if ret != 0:
raise Exception("MaltParser parsing (%s) failed with exit "
"code %d" % (' '.join(cmd), ret))
return DependencyGraph.load(output_file.name)
finally:
input_file.close()
os.remove(input_file.name)
output_file.close()
os.remove(output_file.name)
示例11: tagged_parse
def tagged_parse(self, sentence, verbose=False):
"""
Use MaltParser to parse a sentence. Takes a sentence as a list of
(word, tag) tuples; the sentence must have already been tokenized and
tagged.
:param sentence: Input sentence to parse
:type sentence: L{list} of (word, tag) L{tuple}s.
:return: C{DependencyGraph} the dependency graph representation of the sentence
"""
if not self._malt_bin:
raise Exception("MaltParser location is not configured. Call config_malt() first.")
if not self._trained:
raise Exception("Parser has not been trained. Call train() first.")
input_file = os.path.join(tempfile.gettempdir(), 'malt_input.conll')
output_file = os.path.join(tempfile.gettempdir(), 'malt_output.conll')
execute_string = 'java -jar %s -w %s -c %s -i %s -o %s -m parse'
if not verbose:
execute_string += ' > ' + os.path.join(tempfile.gettempdir(), "malt.out")
f = None
try:
f = open(input_file, 'w')
for (i, (word,tag)) in enumerate(sentence):
f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
(i+1, word, '_', tag, tag, '_', '0', 'a', '_', '_'))
f.write('\n')
f.close()
cmd = ['java', '-jar %s' % self._malt_bin, '-w %s' % tempfile.gettempdir(),
'-c %s' % self.mco, '-i %s' % input_file, '-o %s' % output_file, '-m parse']
self._execute(cmd, 'parse', verbose)
return DependencyGraph.load(output_file)
finally:
if f: f.close()
示例12: parse
def parse(self, tokens):
"""
Parses the input tokens with respect to the parser's grammar. Parsing
is accomplished by representing the search-space of possible parses as
a fully-connected directed graph. Arcs that would lead to ungrammatical
parses are removed and a lattice is constructed of length n, where n is
the number of input tokens, to represent all possible grammatical
traversals. All possible paths through the lattice are then enumerated
to produce the set of non-projective parses.
param tokens: A list of tokens to parse.
type tokens: list(str)
return: An iterator of non-projective parses.
rtype: iter(DependencyGraph)
"""
# Create graph representation of tokens
self._graph = DependencyGraph()
for index, token in enumerate(tokens):
self._graph.nodes[index] = {
'word': token,
'deps': [],
'rel': 'NTOP',
'address': index,
}
for head_node in self._graph.nodes.values():
deps = []
for dep_node in self._graph.nodes.values() :
if (
self._grammar.contains(head_node['word'], dep_node['word'])
and head_node['word'] != dep_node['word']
):
deps.append(dep_node['address'])
head_node['deps'] = deps
# Create lattice of possible heads
roots = []
possible_heads = []
for i, word in enumerate(tokens):
heads = []
for j, head in enumerate(tokens):
if (i != j) and self._grammar.contains(head, word):
heads.append(j)
if len(heads) == 0:
roots.append(i)
possible_heads.append(heads)
# Set roots to attempt
if len(roots) < 2:
if len(roots) == 0:
for i in range(len(tokens)):
roots.append(i)
# Traverse lattice
analyses = []
for root in roots:
stack = []
analysis = [[] for i in range(len(possible_heads))]
i = 0
forward = True
while i >= 0:
if forward:
if len(possible_heads[i]) == 1:
analysis[i] = possible_heads[i][0]
elif len(possible_heads[i]) == 0:
analysis[i] = -1
else:
head = possible_heads[i].pop()
analysis[i] = head
stack.append([i, head])
if not forward:
index_on_stack = False
for stack_item in stack:
if stack_item[0] == i:
index_on_stack = True
orig_length = len(possible_heads[i])
if index_on_stack and orig_length == 0:
for j in range(len(stack) - 1, -1, -1):
stack_item = stack[j]
if stack_item[0] == i:
possible_heads[i].append(stack.pop(j)[1])
elif index_on_stack and orig_length > 0:
head = possible_heads[i].pop()
analysis[i] = head
stack.append([i, head])
forward = True
if i + 1 == len(possible_heads):
analyses.append(analysis[:])
forward = False
if forward:
i += 1
else:
i -= 1
# Filter parses
# ensure 1 root, every thing has 1 head
#.........这里部分代码省略.........
示例13: parse
def parse(self, tokens):
"""
Parses the input tokens with respect to the parser's grammar. Parsing
is accomplished by representing the search-space of possible parses as
a fully-connected directed graph. Arcs that would lead to ungrammatical
parses are removed and a lattice is constructed of length n, where n is
the number of input tokens, to represent all possible grammatical
traversals. All possible paths through the lattice are then enumerated
to produce the set of non-projective parses.
param tokens: A list of tokens to parse.
type tokens: list(str)
return: A set of non-projective parses.
rtype: list(DependencyGraph)
"""
# Create graph representation of tokens
self._graph = DependencyGraph()
self._graph.nodelist = [] # Remove the default root
for index, token in enumerate(tokens):
self._graph.nodelist.append({'word':token, 'deps':[], 'rel':'NTOP', 'address':index})
for head_node in self._graph.nodelist:
deps = []
for dep_node in self._graph.nodelist:
if self._grammar.contains(head_node['word'], dep_node['word']) and not head_node['word'] == dep_node['word']:
deps.append(dep_node['address'])
head_node['deps'] = deps
# Create lattice of possible heads
roots = []
possible_heads = []
for i, word in enumerate(tokens):
heads = []
for j, head in enumerate(tokens):
if (i != j) and self._grammar.contains(head, word):
heads.append(j)
if len(heads) == 0:
roots.append(i)
possible_heads.append(heads)
# Set roots to attempt
if len(roots) > 1:
print("No parses found.")
return False
elif len(roots) == 0:
for i in range(len(tokens)):
roots.append(i)
# Traverse lattice
analyses = []
for root in roots:
stack = []
analysis = [[] for i in range(len(possible_heads))]
i = 0
forward = True
while(i >= 0):
if forward:
if len(possible_heads[i]) == 1:
analysis[i] = possible_heads[i][0]
elif len(possible_heads[i]) == 0:
analysis[i] = -1
else:
head = possible_heads[i].pop()
analysis[i] = head
stack.append([i, head])
if not forward:
index_on_stack = False
for stack_item in stack:
# print stack_item
if stack_item[0] == i:
index_on_stack = True
orig_length = len(possible_heads[i])
# print len(possible_heads[i])
if index_on_stack and orig_length == 0:
for j in xrange(len(stack) -1, -1, -1):
stack_item = stack[j]
if stack_item[0] == i:
possible_heads[i].append(stack.pop(j)[1])
# print stack
elif index_on_stack and orig_length > 0:
head = possible_heads[i].pop()
analysis[i] = head
stack.append([i, head])
forward = True
# print 'Index on stack:', i, index_on_stack
if i + 1 == len(possible_heads):
analyses.append(analysis[:])
forward = False
if forward:
i += 1
else:
i -= 1
# Filter parses
graphs = []
#ensure 1 root, every thing has 1 head
for analysis in analyses:
root_count = 0
root = []
for i, cell in enumerate(analysis):
if cell == -1:
root_count += 1
root = i
if root_count == 1:
#.........这里部分代码省略.........
示例14: as_dependencygraph
def as_dependencygraph( self, keep_dummy_root=False, add_morph=True ):
''' Returns this tree as NLTK's DependencyGraph object.
Note that this method constructs 'zero_based' graph,
where counting of the words starts from 0 and the
root index is -1 (not 0, as in Malt-TAB format);
Parameters
-----------
add_morph : bool
Specifies whether the morphological information
(information about word lemmas, part-of-speech, and
features) should be added to graph nodes.
Note that even if **add_morph==True**, morphological
information is only added if it is available via
estnltk's layer token['analysis'];
Default: True
keep_dummy_root : bool
Specifies whether the graph should include a dummy
TOP / ROOT node, which does not refer to any word,
and yet is the topmost node of the tree.
If the dummy root node is not used, then the root
node is the word node headed by -1;
Default: False
For more information about NLTK's DependencyGraph, see:
http://www.nltk.org/_modules/nltk/parse/dependencygraph.html
'''
from nltk.parse.dependencygraph import DependencyGraph
graph = DependencyGraph( zero_based = True )
all_tree_nodes = [self] + self.get_children()
#
# 0) Fix the root
#
if keep_dummy_root:
# Note: we have to re-construct the root node manually,
# as DependencyGraph's current interface seems to provide
# no easy/convenient means for fixing the root node;
graph.nodes[-1] = graph.nodes[0]
graph.nodes[-1].update( { 'address': -1 } )
graph.root = graph.nodes[-1]
del graph.nodes[0]
#
# 1) Update / Add nodes of the graph
#
for child in all_tree_nodes:
rel = 'xxx' if not child.labels else '|'.join(child.labels)
address = child.word_id
word = child.text
graph.nodes[address].update(
{
'address': address,
'word': child.text,
'rel': rel,
} )
if not keep_dummy_root and child == self:
# If we do not keep the dummy root node, set this tree
# as the root node
graph.root = graph.nodes[address]
if add_morph and child.morph:
# Add morphological information, if possible
lemmas = set([analysis[LEMMA] for analysis in child.morph])
postags = set([analysis[POSTAG] for analysis in child.morph])
feats = set([analysis[FORM] for analysis in child.morph])
lemma = ('|'.join( list(lemmas) )).replace(' ','_')
postag = ('|'.join( list(postags) )).replace(' ','_')
feats = ('|'.join( list(feats) )).replace(' ','_')
graph.nodes[address].update(
{
'tag ': postag,
'ctag' : postag,
'feats': feats,
'lemma': lemma
} )
#
# 2) Update / Add arcs of the graph
#
for child in all_tree_nodes:
# Connect children of given word
deps = [] if not child.children else [c.word_id for c in child.children]
head_address = child.word_id
for dep in deps:
graph.add_arc( head_address, dep )
if child.parent == None and keep_dummy_root:
graph.add_arc( -1, head_address )
# Connect the parent of given node
head = -1 if not child.parent else child.parent.word_id
graph.nodes[head_address].update(
{
'head': head,
} )
return graph