本文整理汇总了Python中CharniakParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python CharniakParser.parse方法的具体用法?Python CharniakParser.parse怎么用?Python CharniakParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CharniakParser
的用法示例。
在下文中一共展示了CharniakParser.parse方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse(self, sentence, rerank='auto', sentence_id=None):
"""Parse some text or tokens and return an NBestList with the
results. sentence can be a string or a sequence. If it is a
string, it will be tokenized. If rerank is True, we will rerank
the n-best list, if False the reranker will not be used. rerank
can also be set to 'auto' which will only rerank if a reranker
model is loaded. If there are no parses or an error occurs,
this will return an empty NBestList."""
rerank = self.check_models_loaded_or_error(rerank)
sentence = Sentence(sentence)
# max_sentence_length is actually 1 longer than the maximum
# allowed sentence length
if len(sentence) >= parser.max_sentence_length - 1:
raise ValueError("Sentence is too long (%s tokens, must be "
"under %s)" %
(len(sentence), parser.max_sentence_length - 1))
try:
parses = parser.parse(sentence.sentrep)
except RuntimeError:
parses = []
nbest_list = NBestList(sentence, parses, sentence_id)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例2: parse_tagged
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse_tagged(self, tokens, possible_tags, rerank='auto'):
"""Parse some pre-tagged, pre-tokenized text. tokens must be a
sequence of strings. possible_tags is map from token indices
to possible POS tags (strings). Tokens without an entry in
possible_tags will be unconstrained by POS. POS tags must be
in the terms.txt file in the parsing model or else you will get
a ValueError. If rerank is True, we will rerank the n-best list,
if False the reranker will not be used. rerank can also be set to
'auto' which will only rerank if a reranker model is loaded."""
rerank = self._check_loaded_models(rerank)
if isinstance(tokens, basestring):
raise ValueError("tokens must be a sequence, not a string.")
ext_pos = parser.ExtPos()
for index in range(len(tokens)):
tags = possible_tags.get(index, [])
if isinstance(tags, basestring):
tags = [tags]
tags = map(str, tags)
valid_tags = ext_pos.addTagConstraints(parser.VectorString(tags))
if not valid_tags:
# at least one of the tags is bad -- find out which ones
# and throw a ValueError
self._find_bad_tag_and_raise_error(tags)
sentence = Sentence(tokens)
parses = parser.parse(sentence.sentrep, ext_pos,
self._parser_thread_slot)
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例3: parse
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse(self, sentence, rerank=True, max_sentence_length=399):
"""Parse some text or tokens and return an NBestList with the
results. sentence can be a string or a sequence. If it is a
string, it will be tokenized. If rerank is True, we will rerank
the n-best list."""
self.check_loaded_models(rerank)
sentence = Sentence(sentence, max_sentence_length)
try:
parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
except RuntimeError:
parses = []
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例4: parse
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse(self, sentence, rerank='auto', max_sentence_length=399):
"""Parse some text or tokens and return an NBestList with the
results. sentence can be a string or a sequence. If it is a
string, it will be tokenized. If rerank is True, we will rerank
the n-best list, if False the reranker will not be used. rerank
can also be set to 'auto' which will only rerank if a reranker
model is loaded."""
rerank = self._check_loaded_models(rerank)
sentence = Sentence(sentence, max_sentence_length)
try:
parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
except RuntimeError:
parses = []
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例5: parse_tagged
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse_tagged(self, tokens, possible_tags, rerank=True):
"""Parse some pre-tagged, pre-tokenized text. tokens is a
sequence of strings. possible_tags is map from token indices
to possible POS tags. Tokens without an entry in possible_tags
will be unconstrained by POS. If rerank is True, we will
rerank the n-best list."""
self.check_loaded_models(rerank)
ext_pos = parser.ExtPos()
for index in range(len(tokens)):
tags = possible_tags.get(index, [])
if isinstance(tags, basestring):
tags = [tags]
ext_pos.addTagConstraints(parser.VectorString(tags))
sentence = Sentence(tokens)
parses = parser.parse(sentence.sentrep, ext_pos,
self._parser_thread_slot)
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例6: parse_constrained
# 需要导入模块: import CharniakParser [as 别名]
# 或者: from CharniakParser import parse [as 别名]
def parse_constrained(self, tokens, constraints, possible_tags=None,
rerank='auto', sentence_id=None):
"""Parse pre-tokenized text with part of speech and/or phrasal
constraints. Constraints is a dictionary of
{(start, end): [terms]}
which represents the constraint that all spans between [start,end)
must be one of the terms in that list. start and end are integers
and terms can be a single string or a list of strings.
This also allows you to incorporate external POS tags as in
parse_tagged(). While you can specify a constraint or an external
POS tag for a word, the semantics are slightly different. Setting
a tag with possible_tags will allow you to force a word to be a
POS tag that the parser's tagger would not ordinarily use for
a tag. Setting a constraint with constraints would only limit
the set of allowable tags. Additionally, setting constraints
doesn't change the probability of the final tree whereas setting
possible_tags changes the probabilities of words given tags and
may change the overall probability.
The rerank flag is the same as in parse()."""
rerank = self.check_models_loaded_or_error(rerank)
if isinstance(tokens, basestring):
raise ValueError("tokens must be a sequence, not a string.")
if constraints:
span_constraints = parser.LabeledSpans()
for (start, end), terms in constraints.items():
if end <= start:
raise ValueError("End must be at least start + 1:"
"(%r, %r) -> %r" % (start, end, terms))
# since Tree.label currently returns a DeprecatedGetter,
# we take some extra steps to get these back to strings
# to avoid type errors
if isinstance(terms, (basestring, DeprecatedGetter)):
terms = [str(terms)]
for term in terms:
span_constraints.addConstraint(int(start), int(end),
str(term))
else:
span_constraints = None
possible_tags = possible_tags or {}
ext_pos = self._possible_tags_to_ext_pos(tokens, possible_tags)
sentence = Sentence(tokens)
try:
parses = parser.parse(sentence.sentrep, ext_pos, span_constraints)
if constraints and not parses:
raise RuntimeError("Reparsing with relaxed constraints")
except RuntimeError:
if span_constraints:
# we should relax them and retry
span_constraints.minSizeForParsing = 2
try:
parses = parser.parse(sentence.sentrep, ext_pos,
span_constraints)
except RuntimeError:
parses = []
else:
parses = []
nbest_list = NBestList(sentence, parses, sentence_id)
if rerank:
nbest_list.rerank(self)
return nbest_list