本文整理汇总了Python中nltk.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.parse方法的具体用法?Python Tree.parse怎么用?Python Tree.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree
的用法示例。
在下文中一共展示了Tree.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tag
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def tag(self, input_tree):
"""
Tag an input tree using the rules in parsed grammars.
"""
#clean input tree:
input_tree = self.clean(input_tree)
text = self.from_tree_to_text(input_tree)
#print "INPUT TEXT: "+text
for rule in self.rules:
rule_name = rule.keys()[0]
rule = rule.values()[0]
matches = re.finditer(rule, text, re.I)
for match in matches:
match_text = match.group(rule_name)
#eliminar espacios al principio y al final del matching text,
#para controlar que cada subarbol <NAME> está bien delimitado
#en el texto resultante (no se come espacios opcionales):
match_text = match_text.strip()
text = string.replace(text, match_text, "<"+rule_name+">")
#print "TEXT = "+text
self.stack.append(match_text)
#print "OUTPUT TEXT : "+text
output_tree_str = "(S "+self.from_text_to_tree_str(text)+" )"
#print "OUTPUT TREE STR: "+output_tree_str
output_tree = Tree.parse(output_tree_str, parse_leaf=self.from_string_token_to_tuple)
return output_tree
示例2: test_simple_tags
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_simple_tags(self):
grammar = "ANIMAL : {<ANIMAL>}"
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S el/DT (ANIMAL perro/NN/ANIMAL) ladra/VB al/DT (ANIMAL gato/NN/ANIMAL))", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result, expected)
示例3: negra_tree_iter
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def negra_tree_iter(corpus_root):
pieces = []
for line in open(corpus_root):
if line.startswith('%'):
s = ''.join(pieces).strip()
if len(s):
yield Tree.parse(s)
pieces = []
else:
pieces.append(line)
if len(pieces):
s = ''.join(pieces).strip()
yield Tree.parse(s)
示例4: parse_ccgbank_tree
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_ccgbank_tree(s):
t = Tree.parse(s,
parse_node=parse_ccgbank_node,
parse_leaf=parse_ccgbank_leaf,
node_pattern=ccgbank_node_pattern,
leaf_pattern=ccgbank_leaf_pattern)
return excise_empty_nodes(t)
示例5: treebank_bracket_parse
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def treebank_bracket_parse(t):
try:
return Tree.parse(t, remove_empty_top_bracketing=True)
# return tree.bracket_parse(t)
except IndexError:
# in case it's the real treebank format,
# strip first and last brackets before parsing
return tree.bracket_parse(t.strip()[1:-1])
示例6: test_cascaded_rules_2
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_cascaded_rules_2(self):
grammar = """
EQUIPOS : {<Equipo_Futbol> <CONJ> <Equipo_Futbol>}
PARTIDO : {<EQUIPOS> <VB>}
"""
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S (PARTIDO (EQUIPOS Real_Madrid/NN/NE/Equipo_Futbol y/CONJ F.C._Barcelona/NN/NE/Equipo_Futbol) disputan/VB) hoy/ADV la/DT final/NN de/PP la/DT Copa_del_Rey/NN/NE/Evento)", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result,expected)
示例7: test_simple_words
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_simple_words(self):
grammar = """
PERRO : {"el" "perro"}
GATO : {"al" "gato"}
"""
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S (PERRO el/DT perro/NN/ANIMAL) ladra/VB (GATO al/DT gato/NN/ANIMAL))", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result,expected)
示例8: test_cascaded_rules
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_cascaded_rules(self):
grammar = """
NP : {<DT>? <NN>+}
VP : {<VB> <ADV>}
"""
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S (NP Real_Madrid/NN/NE/Equipo_Futbol) y/CONJ (NP F.C._Barcelona/NN/NE/Equipo_Futbol) (VP disputan/VB hoy/ADV) (NP la/DT final/NN) de/PP (NP la/DT Copa_del_Rey/NN/NE/Evento))", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result,expected)
示例9: test_repetitive_rules
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_repetitive_rules(self):
self.text = [('He',['VB']), ('estudiado',['VB']), ('en',['ADV']), ('la',['DT']), ('Universidad',['NN']), ('Complutense',['NN']), ('y',['CONJ']), ('he',['VB']), ('trabajado',['VB']), ('en',['ADV']), ('Yahoo!',['NN']), ('durante',['ADV']), ('2',['NN']), ('años',['NN'])]
grammar = """
UNIVERSIDAD : {"universidad"}
UNIVERSIDAD : {"complutense"}
UNIVERSIDAD : {<UNIVERSIDAD> <UNIVERSIDAD>}
"""
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S He/VB estudiado/VB en/ADV la/DT (UNIVERSIDAD (UNIVERSIDAD Universidad/NN) (UNIVERSIDAD Complutense/NN)) y/CONJ he/VB trabajado/VB en/ADV Yahoo!/NN durante/ADV 2/NN años/NN)", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result,expected)
示例10: parse
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse(self):
"""
Accesses the parse tree based on the S-expression parse string in the XML
:getter: Returns the NLTK parse tree
:type: nltk.Tree
"""
if self.parse_string is not None and self._parse is None:
self._parse = Tree.parse(self._parse_string)
return self._parse
示例11: test_context_rules
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def test_context_rules(self):
self.text = [('He',['VB']), ('estudiado',['VB']), ('en',['ADV']), ('la',['DT']), ('Universidad',['NN']), ('Complutense',['NN']), ('y',['CONJ']), ('he',['VB']), ('trabajado',['VB']), ('en',['ADV']), ('Yahoo!',['NN']), ('durante',['ADV']), ('2',['NN']), ('años',['NN'])]
grammar = """
EMPRESA : "trabajado" "en" {<NN>+}
UNIVERSIDAD : "estudiado" "en" <DT>? {<NN>+}
TECNOLOGIA : "trabajado" "con" {<.*>}
"""
rp = ruleparser.RuleParser(grammar)
expected = Tree.parse("(S He/VB estudiado/VB en/ADV la/DT (UNIVERSIDAD Universidad/NN Complutense/NN) y/CONJ he/VB trabajado/VB en/ADV (EMPRESA Yahoo!/NN) durante/ADV 2/NN años/NN)", parse_leaf=rp.from_string_token_to_tuple)
result = rp.tag(self.text)
self.assertEqual(result,expected)
示例12: parse_rst_dt_tree
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_rst_dt_tree(tstr, context=None):
"""
Read a single RST tree from its RST DT string representation.
If context is set, align the tree with it. You should really
try to pass in a context (see `RSTContext` if you can, the
None case is really intended for testing, or in cases where
you don't have an original text)
"""
pstr = _preprocess(tstr)
tree_ = Tree.parse(pstr, leaf_pattern=_LEAF_PATTERN)
tree_ = _postprocess(tree_)
if context:
tree_ = _align_with_context(tree_, context)
return tree_
示例13: parse_lightweight_tree
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_lightweight_tree(tstr):
"""
Parse lightweight RST debug syntax into SimpleRSTTree, eg. ::
(R:attribution
(N:elaboration (N foo) (S bar)
(S quux)))
This is motly useful for debugging or for knocking out quick
examples
"""
_lw_type_re = re.compile(r'(?P<nuc>[RSN])(:(?P<rel>.*)|$)')
_lw_nuc_map = dict((nuc[0], nuc)
for nuc in ["Root", "Nucleus", "Satellite"])
# pylint: disable=C0103
PosInfo = collections.namedtuple("PosInfo", "text edu")
# pylint: enable=C0103
def walk(subtree, posinfo=PosInfo(text=0, edu=0)):
"""
walk down first-cut tree, counting span info and returning a
fancier tree along the way
"""
if isinstance(subtree, Tree):
start = copy.copy(posinfo)
children = []
for kid in subtree:
tree, posinfo = walk(kid, posinfo)
children.append(tree)
match = _lw_type_re.match(treenode(subtree))
if not match:
raise RSTTreeException("Missing nuclearity annotation in ",
subtree)
nuclearity = _lw_nuc_map[match.group("nuc")]
rel = match.group("rel") or "leaf"
edu_span = (start.edu, posinfo.edu - 1)
span = Span(start.text, posinfo.text)
node = Node(nuclearity, edu_span, span, rel)
return SimpleRSTTree(node, children), posinfo
else:
text = subtree
start = posinfo.text
end = start + len(text)
posinfo2 = PosInfo(text=end, edu=posinfo.edu+1)
return EDU(posinfo.edu, Span(start, end), text), posinfo2
return walk(Tree.parse(tstr))[0]
示例14: read_trees
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def read_trees(iterable):
"""Reads an iterable in order to mount a syntactic tree."""
from nltk import Tree
tree_strings = []
trees = []
for line in iterable:
uline = unicode(line, 'utf-8')
data = uline.split()
if len(data) <= 1:
tree = Tree.parse(' '.join(tree_strings), brackets='[]')
trees.append(tree)
tree_strings = []
continue
word = data[ConllPos.word]
pos = data[ConllPos.pos]
parse = data[ConllPos.parse]
# a little workaround.
# to avoid messing nltk.Tree string parser, we use [] as tree brackets
# instead of the default (). This is done because "(" and ")" appear as
# separate tokens, while "["and "]" do not.
tree_string = parse.replace('(', '[').replace(')', ']')
# treat "broken" constituents like VP- and -VP as normal VPs
tree_string = tree_string.replace('-', '')
# treat multiwords and concatenate their POS with #
words = [' %s#%s ' % (part, pos) for part in word.split('_')]
words_string = ' '.join(words)
tree_string = tree_string.replace('*', words_string)
tree_strings.append(tree_string)
return trees
示例15: ctb_tree_iter_f
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def ctb_tree_iter_f(corpus_root):
in_s = False
pieces = []
print >>sys.stderr, corpus_root
for line in open(corpus_root):
lowered = line.strip().lower()
if lowered.startswith('<s '):
in_s = True
elif lowered.startswith('</s>'):
s = ''.join(pieces).strip()
if len(s):
# In a couple instances of the CTB, there are two sentences
# contained in a single <S> node. Deal with that here
for s1 in split_separate_setences(s):
yield Tree.parse(s1)
in_s = False
pieces = []
elif in_s:
pieces.append(line)