本文整理汇总了Python中CharniakParser类的典型用法代码示例。如果您正苦于以下问题:Python CharniakParser类的具体用法?Python CharniakParser怎么用?Python CharniakParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CharniakParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, text_or_tokens):
if isinstance(text_or_tokens, Sentence):
self.sentrep = text_or_tokens.sentrep
elif isinstance(text_or_tokens, basestring):
self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>')
else:
# text_or_tokens is a sequence -- need to make sure that each
# element is a string to avoid crashing
text_or_tokens = [parser.ptbEscape(str(token))
for token in text_or_tokens]
self.sentrep = parser.SentRep(text_or_tokens)
示例2: load_parser_model
def load_parser_model(self, model_dir, **parser_options):
"""Load the parsing model from model_dir and set parsing
options. In general, the default options should suffice but see
the set_parser_options() method for details. Note that the parser
does not allow loading multiple models within the same process
(calling this function twice will raise a RuntimeError)."""
if self._parser_model_loaded:
raise RuntimeError('Parser is already loaded and can only be loaded once.')
if not exists(model_dir):
raise ValueError('Parser model directory %r does not exist.' % model_dir)
self._parser_model_loaded = True
self.parser_model_dir = model_dir
parser.loadModel(model_dir)
self.set_parser_options(**parser_options)
示例3: parse_tagged
def parse_tagged(self, tokens, possible_tags, rerank='auto'):
"""Parse some pre-tagged, pre-tokenized text. tokens must be a
sequence of strings. possible_tags is map from token indices
to possible POS tags (strings). Tokens without an entry in
possible_tags will be unconstrained by POS. POS tags must be
in the terms.txt file in the parsing model or else you will get
a ValueError. If rerank is True, we will rerank the n-best list,
if False the reranker will not be used. rerank can also be set to
'auto' which will only rerank if a reranker model is loaded."""
rerank = self._check_loaded_models(rerank)
if isinstance(tokens, basestring):
raise ValueError("tokens must be a sequence, not a string.")
ext_pos = parser.ExtPos()
for index in range(len(tokens)):
tags = possible_tags.get(index, [])
if isinstance(tags, basestring):
tags = [tags]
tags = map(str, tags)
valid_tags = ext_pos.addTagConstraints(parser.VectorString(tags))
if not valid_tags:
# at least one of the tags is bad -- find out which ones
# and throw a ValueError
self._find_bad_tag_and_raise_error(tags)
sentence = Sentence(tokens)
parses = parser.parse(sentence.sentrep, ext_pos,
self._parser_thread_slot)
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例4: load_parsing_model
def load_parsing_model(self, model_dir, language='En',
case_insensitive=False, nbest=50, small_corpus=True,
overparsing=21, debug=0, smoothPos=0):
"""Load the parsing model from model_dir and set parsing
options. In general, the default options should suffice. Note
that the parser does not allow loading multiple models within
the same process."""
if self._parser_model_loaded:
raise ValueError('Parser is already loaded and can only be loaded once.')
if not os.path.exists(model_dir):
raise ValueError('Parser model directory %r does not exist.' % model_dir)
self._parser_model_loaded = True
parser.loadModel(model_dir)
self.parser_model_dir = model_dir
parser.setOptions(language, case_insensitive, nbest, small_corpus,
overparsing, debug, smoothPos)
示例5: parse
def parse(self, sentence, rerank='auto', sentence_id=None):
"""Parse some text or tokens and return an NBestList with the
results. sentence can be a string or a sequence. If it is a
string, it will be tokenized. If rerank is True, we will rerank
the n-best list, if False the reranker will not be used. rerank
can also be set to 'auto' which will only rerank if a reranker
model is loaded. If there are no parses or an error occurs,
this will return an empty NBestList."""
rerank = self.check_models_loaded_or_error(rerank)
sentence = Sentence(sentence)
# max_sentence_length is actually 1 longer than the maximum
# allowed sentence length
if len(sentence) >= parser.max_sentence_length - 1:
raise ValueError("Sentence is too long (%s tokens, must be "
"under %s)" %
(len(sentence), parser.max_sentence_length - 1))
try:
parses = parser.parse(sentence.sentrep)
except RuntimeError:
parses = []
nbest_list = NBestList(sentence, parses, sentence_id)
if rerank:
nbest_list.rerank(self)
return nbest_list
示例6: __init__
def __init__(self, text_or_tokens, max_sentence_length=399):
if isinstance(text_or_tokens, Sentence):
self.sentrep = text_or_tokens.sentrep
elif isinstance(text_or_tokens, basestring):
self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
max_sentence_length)
else:
self.sentrep = parser.SentRep(text_or_tokens)
示例7: sentences_from_file
def sentences_from_file(this_class, filename):
"""Given the path to a filename containing multiple SGML(-ish)
lines (typical input to the command line parser), returns a list
of Sentence objects (one for each tree in the text)."""
# Note that the native method below leaks. We work around this
# by acquiring its pointer in __init__
sentReps = parser.sentRepsFromFile(filename)
return map(this_class, sentReps)
示例8: trees_from_file
def trees_from_file(this_class, filename):
"""Given the path to a file containing multiple Penn Treebank
trees, returns a list of Tree objects (one for each tree in the
file)."""
# see trees_from_string for an explanation
trees = list(parser.inputTreesFromFile(filename))
for tree in trees:
tree.this.acquire()
return map(this_class, trees)
示例9: __init__
def __init__(self, text_or_tokens, max_sentence_length=399):
if isinstance(text_or_tokens, Sentence):
self.sentrep = text_or_tokens.sentrep
elif isinstance(text_or_tokens, basestring):
self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
max_sentence_length)
else:
# text_or_tokens is a sequence -- need to make sure that each
# element is a string to avoid crashing
text_or_tokens = map(str, text_or_tokens)
self.sentrep = parser.SentRep(text_or_tokens)
示例10: log_prob
def log_prob(self):
"""Asks the current first-stage parsing model to score an existing
tree. Returns parser model's log probability. Python equivalent of the
evalTree command line tool.
Note that you must have a parser model loaded in order to call
this parses (otherwise you'll get a ValueError)."""
if not RerankingParser._parser_model_loaded:
raise ValueError("You need to have loaded a parser model in "
"order to get the log probability.")
return parser.treeLogProb(self._tree)
示例11: sentences_from_string
def sentences_from_string(this_class, text):
"""Given text containing SGML(-ish) lines (typical input to
the command line parser), returns a list of Sentence objects
(one for each tree in the text). Example usage:
>>> Sentence.sentences_from_string('<s> Test </s>')
[bllipparser.RerankingParser.Sentence(['Test'])]
"""
# Note that the native method below leaks. We work around this
# by acquiring its pointer in __init__
sentReps = parser.sentRepsFromString(text)
return map(this_class, sentReps)
示例12: set_parser_options
def set_parser_options(self, language='En', case_insensitive=False,
nbest=50, small_corpus=True, overparsing=21,
debug=0, smooth_pos=0):
"""Set options for the parser. Note that this is called
automatically by load_parser_model() so you should only need to
call this to update the parsing options. The method returns a
dictionary of the new options.
The options are as follows: language is a string describing
the language. Currently, it can be one of En (English), Ch
(Chinese), or Ar (Arabic). case_insensitive will make the parser
ignore capitalization. nbest is the maximum size of the n-best
list. small_corpus=True enables additional smoothing (originally
intended for training from small corpora, but helpful in many
situations). overparsing determines how much more time the parser
will spend on a sentence relative to the time it took to find the
first possible complete parse. This affects the speed/accuracy
tradeoff. debug takes a non-negative integer. Setting it higher
than 0 will cause the parser to print debug messages (surprising,
no?). Setting smooth_pos to a number higher than 0 will cause the
parser to assign that value as the probability of seeing a known
word in a new part-of-speech (one never seen in training)."""
if not RerankingParser._parser_model_loaded:
raise RuntimeError('Parser must already be loaded (call '
'load_parser_model() first)')
parser.setOptions(language, case_insensitive, nbest, small_corpus,
overparsing, debug, smooth_pos)
self.parser_options = {
'language': language,
'case_insensitive': case_insensitive,
'nbest': nbest,
'small_corpus': small_corpus,
'overparsing': overparsing,
'debug': debug,
'smooth_pos': smooth_pos
}
return self.parser_options
示例13: __str__
def __str__(self):
"""Represent the n-best list in a similar output format to the
command-line parser and reranker."""
if self._reranked:
from cStringIO import StringIO
combined = StringIO()
combined .write('%d dummy\n' % len(self.parses))
for parse in self.parses:
combined.write('%s %s\n%s\n' % \
(parse.reranker_score, parse.parser_score, parse.ptb_parse))
combined.seek(0)
return combined.read()
else:
return parser.asNBestList(self._parses)
示例14: trees_from_string
def trees_from_string(this_class, text):
"""Given text containing multiple Penn Treebank trees, returns
a list of Tree objects (one for each tree in the text)."""
# Note: the native method below gives us memory ownership of
# the InputTree objects in the vector. We acquire their pointers
# and store them in a Python list (the vector won't stick
# around). InputTree objects typically contain other InputTree
# objects and the outer tree will free the inner trees when it is
# deleted. So, we only need (and want) to acquire the pointer of
# the outermost InputTree tree.
trees = list(parser.inputTreesFromString(text))
for tree in trees:
tree.this.acquire()
return map(this_class, trees)
示例15: parse
def parse(self, sentence, rerank=True, max_sentence_length=399):
"""Parse some text or tokens and return an NBestList with the
results. sentence can be a string or a sequence. If it is a
string, it will be tokenized. If rerank is True, we will rerank
the n-best list."""
self.check_loaded_models(rerank)
sentence = Sentence(sentence, max_sentence_length)
try:
parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
except RuntimeError:
parses = []
nbest_list = NBestList(sentence, parses)
if rerank:
nbest_list.rerank(self)
return nbest_list