Python CharniakParser类代码示例

本文整理汇总了Python中CharniakParser类的典型用法代码示例。如果您正苦于以下问题：Python CharniakParser类的具体用法？Python CharniakParser怎么用？Python CharniakParser使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了CharniakParser类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

 def __init__(self, text_or_tokens):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>')
     else:
         # text_or_tokens is a sequence -- need to make sure that each
         # element is a string to avoid crashing
         text_or_tokens = [parser.ptbEscape(str(token))
             for token in text_or_tokens]
         self.sentrep = parser.SentRep(text_or_tokens)

开发者ID:cdg720，项目名称:bllip-parser，代码行数:11，代码来源:RerankingParser.py

示例2: load_parser_model

 def load_parser_model(self, model_dir, **parser_options):
     """Load the parsing model from model_dir and set parsing
     options. In general, the default options should suffice but see
     the set_parser_options() method for details. Note that the parser
     does not allow loading multiple models within the same process
     (calling this function twice will raise a RuntimeError)."""
     if self._parser_model_loaded:
         raise RuntimeError('Parser is already loaded and can only be loaded once.')
     if not exists(model_dir):
         raise ValueError('Parser model directory %r does not exist.' % model_dir)
     self._parser_model_loaded = True
     self.parser_model_dir = model_dir
     parser.loadModel(model_dir)
     self.set_parser_options(**parser_options)

开发者ID:lixiangnlp，项目名称:bllip-parser，代码行数:14，代码来源:RerankingParser.py

示例3: parse_tagged

    def parse_tagged(self, tokens, possible_tags, rerank='auto'):
        """Parse some pre-tagged, pre-tokenized text. tokens must be a
        sequence of strings. possible_tags is map from token indices
        to possible POS tags (strings). Tokens without an entry in
        possible_tags will be unconstrained by POS. POS tags must be
        in the terms.txt file in the parsing model or else you will get
        a ValueError. If rerank is True, we will rerank the n-best list,
        if False the reranker will not be used. rerank can also be set to
        'auto' which will only rerank if a reranker model is loaded."""
        rerank = self._check_loaded_models(rerank)
        if isinstance(tokens, basestring):
            raise ValueError("tokens must be a sequence, not a string.")

        ext_pos = parser.ExtPos()
        for index in range(len(tokens)):
            tags = possible_tags.get(index, [])
            if isinstance(tags, basestring):
                tags = [tags]
            tags = map(str, tags)
            valid_tags = ext_pos.addTagConstraints(parser.VectorString(tags))
            if not valid_tags:
                # at least one of the tags is bad -- find out which ones
                # and throw a ValueError
                self._find_bad_tag_and_raise_error(tags)

        sentence = Sentence(tokens)
        parses = parser.parse(sentence.sentrep, ext_pos,
            self._parser_thread_slot)
        nbest_list = NBestList(sentence, parses)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list

开发者ID:lixiangnlp，项目名称:bllip-parser，代码行数:32，代码来源:RerankingParser.py

示例4: load_parsing_model

 def load_parsing_model(self, model_dir, language='En',
                        case_insensitive=False, nbest=50, small_corpus=True,
                        overparsing=21, debug=0, smoothPos=0):
     """Load the parsing model from model_dir and set parsing
     options. In general, the default options should suffice. Note
     that the parser does not allow loading multiple models within
     the same process."""
     if self._parser_model_loaded:
         raise ValueError('Parser is already loaded and can only be loaded once.')
     if not os.path.exists(model_dir):
         raise ValueError('Parser model directory %r does not exist.' % model_dir)
     self._parser_model_loaded = True
     parser.loadModel(model_dir)
     self.parser_model_dir = model_dir
     parser.setOptions(language, case_insensitive, nbest, small_corpus,
                       overparsing, debug, smoothPos)

开发者ID:niip42，项目名称:bllip-parser，代码行数:16，代码来源:RerankingParser.py

示例5: parse

    def parse(self, sentence, rerank='auto', sentence_id=None):
        """Parse some text or tokens and return an NBestList with the
        results. sentence can be a string or a sequence. If it is a
        string, it will be tokenized. If rerank is True, we will rerank
        the n-best list, if False the reranker will not be used. rerank
        can also be set to 'auto' which will only rerank if a reranker
        model is loaded. If there are no parses or an error occurs,
        this will return an empty NBestList."""
        rerank = self.check_models_loaded_or_error(rerank)

        sentence = Sentence(sentence)
        # max_sentence_length is actually 1 longer than the maximum
        # allowed sentence length
        if len(sentence) >= parser.max_sentence_length - 1:
            raise ValueError("Sentence is too long (%s tokens, must be "
                             "under %s)" %
                             (len(sentence), parser.max_sentence_length - 1))

        try:
            parses = parser.parse(sentence.sentrep)
        except RuntimeError:
            parses = []
        nbest_list = NBestList(sentence, parses, sentence_id)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:26，代码来源:RerankingParser.py

示例6: init

 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         self.sentrep = parser.SentRep(text_or_tokens)

开发者ID:niip42，项目名称:bllip-parser，代码行数:8，代码来源:RerankingParser.py

示例7: sentences_from_file

 def sentences_from_file(this_class, filename):
     """Given the path to a filename containing multiple SGML(-ish)
     lines (typical input to the command line parser), returns a list
     of Sentence objects (one for each tree in the text)."""
     # Note that the native method below leaks. We work around this
     # by acquiring its pointer in __init__
     sentReps = parser.sentRepsFromFile(filename)
     return map(this_class, sentReps)

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:8，代码来源:RerankingParser.py

示例8: trees_from_file

 def trees_from_file(this_class, filename):
     """Given the path to a file containing multiple Penn Treebank
     trees, returns a list of Tree objects (one for each tree in the
     file)."""
     # see trees_from_string for an explanation
     trees = list(parser.inputTreesFromFile(filename))
     for tree in trees:
         tree.this.acquire()
     return map(this_class, trees)

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:9，代码来源:RerankingParser.py

示例9: init

 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         # text_or_tokens is a sequence -- need to make sure that each
         # element is a string to avoid crashing
         text_or_tokens = map(str, text_or_tokens)
         self.sentrep = parser.SentRep(text_or_tokens)

开发者ID:lixiangnlp，项目名称:bllip-parser，代码行数:11，代码来源:RerankingParser.py

示例10: log_prob

    def log_prob(self):
        """Asks the current first-stage parsing model to score an existing
        tree. Returns parser model's log probability. Python equivalent of the
        evalTree command line tool.

        Note that you must have a parser model loaded in order to call
        this parses (otherwise you'll get a ValueError)."""
        if not RerankingParser._parser_model_loaded:
            raise ValueError("You need to have loaded a parser model in "
                             "order to get the log probability.")
        return parser.treeLogProb(self._tree)

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:11，代码来源:RerankingParser.py

示例11: sentences_from_string

    def sentences_from_string(this_class, text):
        """Given text containing SGML(-ish) lines (typical input to
        the command line parser), returns a list of Sentence objects
        (one for each tree in the text). Example usage:

        >>> Sentence.sentences_from_string('<s> Test </s>')
        [bllipparser.RerankingParser.Sentence(['Test'])]
        """
        # Note that the native method below leaks. We work around this
        # by acquiring its pointer in __init__
        sentReps = parser.sentRepsFromString(text)
        return map(this_class, sentReps)

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:12，代码来源:RerankingParser.py

示例12: set_parser_options

    def set_parser_options(self, language='En', case_insensitive=False,
                           nbest=50, small_corpus=True, overparsing=21,
                           debug=0, smooth_pos=0):
        """Set options for the parser. Note that this is called
        automatically by load_parser_model() so you should only need to
        call this to update the parsing options. The method returns a
        dictionary of the new options.

        The options are as follows: language is a string describing
        the language. Currently, it can be one of En (English), Ch
        (Chinese), or Ar (Arabic). case_insensitive will make the parser
        ignore capitalization. nbest is the maximum size of the n-best
        list. small_corpus=True enables additional smoothing (originally
        intended for training from small corpora, but helpful in many
        situations). overparsing determines how much more time the parser
        will spend on a sentence relative to the time it took to find the
        first possible complete parse. This affects the speed/accuracy
        tradeoff. debug takes a non-negative integer. Setting it higher
        than 0 will cause the parser to print debug messages (surprising,
        no?). Setting smooth_pos to a number higher than 0 will cause the
        parser to assign that value as the probability of seeing a known
        word in a new part-of-speech (one never seen in training)."""
        if not RerankingParser._parser_model_loaded:
            raise RuntimeError('Parser must already be loaded (call '
                               'load_parser_model() first)')

        parser.setOptions(language, case_insensitive, nbest, small_corpus,
                          overparsing, debug, smooth_pos)
        self.parser_options = {
            'language': language,
            'case_insensitive': case_insensitive,
            'nbest': nbest,
            'small_corpus': small_corpus,
            'overparsing': overparsing,
            'debug': debug,
            'smooth_pos': smooth_pos
        }
        return self.parser_options

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:38，代码来源:RerankingParser.py

示例13: str

 def __str__(self):
     """Represent the n-best list in a similar output format to the
     command-line parser and reranker."""
     if self._reranked:
         from cStringIO import StringIO
         combined = StringIO()
         combined .write('%d dummy\n' % len(self.parses))
         for parse in self.parses:
             combined.write('%s %s\n%s\n' % \
                 (parse.reranker_score, parse.parser_score, parse.ptb_parse))
         combined.seek(0)
         return combined.read()
     else:
         return parser.asNBestList(self._parses)

开发者ID:niip42，项目名称:bllip-parser，代码行数:14，代码来源:RerankingParser.py

示例14: trees_from_string

 def trees_from_string(this_class, text):
     """Given text containing multiple Penn Treebank trees, returns
     a list of Tree objects (one for each tree in the text)."""
     # Note: the native method below gives us memory ownership of
     # the InputTree objects in the vector. We acquire their pointers
     # and store them in a Python list (the vector won't stick
     # around). InputTree objects typically contain other InputTree
     # objects and the outer tree will free the inner trees when it is
     # deleted. So, we only need (and want) to acquire the pointer of
     # the outermost InputTree tree.
     trees = list(parser.inputTreesFromString(text))
     for tree in trees:
         tree.this.acquire()
     return map(this_class, trees)

开发者ID:pkclaus，项目名称:bllip-parser，代码行数:14，代码来源:RerankingParser.py

示例15: parse

    def parse(self, sentence, rerank=True, max_sentence_length=399):
        """Parse some text or tokens and return an NBestList with the
        results.  sentence can be a string or a sequence.  If it is a
        string, it will be tokenized.  If rerank is True, we will rerank
        the n-best list."""
        self.check_loaded_models(rerank)

        sentence = Sentence(sentence, max_sentence_length)
        try:
            parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
        except RuntimeError:
            parses = []
        nbest_list = NBestList(sentence, parses)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list