Python compat.text_type方法代码示例

本文整理汇总了Python中nltk.compat.text_type方法的典型用法代码示例。如果您正苦于以下问题：Python compat.text_type方法的具体用法？Python compat.text_type怎么用？Python compat.text_type使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.compat的用法示例。

在下文中一共展示了compat.text_type方法的14个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #////////////////////////////////////////////////////////////

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:24，代码来源:text.py

示例2: tgrep_positions

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield []

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:28，代码来源:tgrep.py

示例3: tag

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:26，代码来源:hunpos.py

示例4: tag_sents

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def tag_sents(self, sentences):
    encoding = self._encoding
    default_options = ' '.join(_java_options)
    config_java(options=self.java_options, verbose=False)

    # Create a temporary input file
    _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

    cmd = list(self._cmd)
    cmd.extend(['-encoding', encoding])

    # Write the actual sentences to the temporary input file
    _input_fh = os.fdopen(_input_fh, 'wb')
    _input = '\n'.join((' '.join(x) for x in sentences))
    if isinstance(_input, compat.text_type) and encoding:
        _input = _input.encode(encoding)
    _input_fh.write(_input)
    _input_fh.close()

    # Run the tagger and get the output
    stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                   stdout=PIPE, stderr=PIPE)
    stanpos_output = stanpos_output.decode(encoding)

    # Delete the temporary file
    os.unlink(self._input_file_path)

    # Return java configurations to their default values
    config_java(options=default_options, verbose=False)

    return self.parse_output(stanpos_output, sentences)

开发者ID:memray，项目名称:seq2seq-keyphrase，代码行数:33，代码来源:stanford-pos-tagger.py

示例5: repr

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:17，代码来源:framenet.py

示例6: _tgrep_node_literal_value

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def _tgrep_node_literal_value(node):
    '''
    Gets the string value of a given parse tree node, for comparison
    using the tgrep node literal predicates.
    '''
    return (node.label() if _istree(node) else text_type(node))

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:8，代码来源:tgrep.py

示例7: _execute

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)
                
            stdout = stdout.replace(b'\xc2\xa0',b' ')
            stdout = stdout.replace(b'\xa0',b' ')
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:41，代码来源:stanford.py

示例8: _execute

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:35，代码来源:stanford.py

示例9: fromxml

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:8，代码来源:downloader.py

示例10: tag_sents

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:33，代码来源:stanford.py

示例11: segment_sents

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout

开发者ID:SignalMedia，项目名称:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda，代码行数:33，代码来源:stanford_segmenter.py

示例12: fromxml

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib)

开发者ID:SignalMedia，项目名称:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda，代码行数:9，代码来源:downloader.py

示例13: _execute

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)

            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout

开发者ID:jarrellmark，项目名称:neighborhood_mood_aws，代码行数:39，代码来源:stanford.py

示例14: tgrep_nodes

# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import text_type [as 别名]
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield []

开发者ID:sdoran35，项目名称:hate-to-hugs，代码行数:28，代码来源:tgrep.py

注：本文中的nltk.compat.text_type方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __init__