當前位置: 首頁>>代碼示例>>Python>>正文


Python compat.text_type方法代碼示例

本文整理匯總了Python中nltk.compat.text_type方法的典型用法代碼示例。如果您正苦於以下問題:Python compat.text_type方法的具體用法?Python compat.text_type怎麽用?Python compat.text_type使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.compat的用法示例。


在下文中一共展示了compat.text_type方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def __init__(self, tokens, name=None):
        """
        Create a Text object.

        :param tokens: The source text.
        :type tokens: sequence of str
        """
        if self._COPY_TOKENS:
            tokens = list(tokens)
        self.tokens = tokens

        if name:
            self.name = name
        elif ']' in tokens[:20]:
            end = tokens[:20].index(']')
            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
        else:
            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."

    #////////////////////////////////////////////////////////////
    # Support item & slice access
    #//////////////////////////////////////////////////////////// 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:24,代碼來源:text.py

示例2: tgrep_positions

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def tgrep_positions(pattern, trees, search_leaves=True):
    """
    Return the tree positions in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree positions)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [position for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:28,代碼來源:tgrep.py

示例3: tag

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def tag(self, tokens):
        """Tags a single sentence: a list of words.
        The tokens should not contain any newline characters.
        """
        for token in tokens:
            assert "\n" not in token, "Tokens should not contain newlines"
            if isinstance(token, compat.text_type):
                token = token.encode(self._encoding)
            self._hunpos.stdin.write(token + b"\n")
        # We write a final empty line to tell hunpos that the sentence is finished:
        self._hunpos.stdin.write(b"\n")
        self._hunpos.stdin.flush()

        tagged_tokens = []
        for token in tokens:
            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
            tag = (tagged[1] if len(tagged) > 1 else None)
            tagged_tokens.append((token, tag))
        # We have to read (and dismiss) the final empty line:
        self._hunpos.stdout.readline()

        return tagged_tokens

# skip doctests if Hunpos tagger is not installed 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:26,代碼來源:hunpos.py

示例4: tag_sents

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def tag_sents(self, sentences):
    encoding = self._encoding
    default_options = ' '.join(_java_options)
    config_java(options=self.java_options, verbose=False)

    # Create a temporary input file
    _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

    cmd = list(self._cmd)
    cmd.extend(['-encoding', encoding])

    # Write the actual sentences to the temporary input file
    _input_fh = os.fdopen(_input_fh, 'wb')
    _input = '\n'.join((' '.join(x) for x in sentences))
    if isinstance(_input, compat.text_type) and encoding:
        _input = _input.encode(encoding)
    _input_fh.write(_input)
    _input_fh.close()

    # Run the tagger and get the output
    stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                   stdout=PIPE, stderr=PIPE)
    stanpos_output = stanpos_output.decode(encoding)

    # Delete the temporary file
    os.unlink(self._input_file_path)

    # Return java configurations to their default values
    config_java(options=default_options, verbose=False)

    return self.parse_output(stanpos_output, sentences) 
開發者ID:memray,項目名稱:seq2seq-keyphrase,代碼行數:33,代碼來源:stanford-pos-tagger.py

示例5: __repr__

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def __repr__(self):
        """
        Return a string representation for this corpus view that is
        similar to a list's representation; but if it would be more
        than 60 characters long, it is truncated.
        """
        pieces = []
        length = 5

        for elt in self:
            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
            length += len(pieces[-1]) + 2
            if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:17,代碼來源:framenet.py

示例6: _tgrep_node_literal_value

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def _tgrep_node_literal_value(node):
    '''
    Gets the string value of a given parse tree node, for comparison
    using the tgrep node literal predicates.
    '''
    return (node.label() if _istree(node) else text_type(node)) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:8,代碼來源:tgrep.py

示例7: _execute

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)
                
            stdout = stdout.replace(b'\xc2\xa0',b' ')
            stdout = stdout.replace(b'\xa0',b' ')
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:41,代碼來源:stanford.py

示例8: _execute

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-charset', encoding])
        _options_cmd = self._options_cmd
        if _options_cmd:
            cmd.extend(['-options', self._options_cmd])

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            cmd.append(input_file.name)

            # Run the tagger and get the output.
            stdout, stderr = java(cmd, classpath=self._stanford_jar,
                                  stdout=PIPE, stderr=PIPE)
            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:35,代碼來源:stanford.py

示例9: fromxml

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        return Package(**xml.attrib) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:8,代碼來源:downloader.py

示例10: tag_sents

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def tag_sents(self, sentences):
        encoding = self._encoding
        default_options = ' '.join(_java_options)
        config_java(options=self.java_options, verbose=False)

        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        cmd = list(self._cmd)
        cmd.extend(['-encoding', encoding])
        
        # Write the actual sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()
        
        # Run the tagger and get the output
        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
                                                       stdout=PIPE, stderr=PIPE)
        stanpos_output = stanpos_output.decode(encoding)
        
        # Delete the temporary file
        os.unlink(self._input_file_path) 

        # Return java configurations to their default values
        config_java(options=default_options, verbose=False)
                
        return self.parse_output(stanpos_output, sentences) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:33,代碼來源:stanford.py

示例11: segment_sents

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def segment_sents(self, sentences):
        """
        """
        encoding = self._encoding
        # Create a temporary input file
        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)

        # Write the actural sentences to the temporary input file
        _input_fh = os.fdopen(_input_fh, 'wb')
        _input = '\n'.join((' '.join(x) for x in sentences))
        if isinstance(_input, compat.text_type) and encoding:
            _input = _input.encode(encoding)
        _input_fh.write(_input)
        _input_fh.close()

        cmd = [
            'edu.stanford.nlp.ie.crf.CRFClassifier',
            '-sighanCorporaDict', self._sihan_corpora_dict,
            '-textFile', self._input_file_path,
            '-sighanPostProcessing', 'true',
            '-keepAllWhitespaces', 'false',
            '-loadClassifier', self._model,
            '-serDictionary', self._dict
        ]

        stdout = self._execute(cmd)

        # Delete the temporary file
        os.unlink(self._input_file_path)

        return stdout 
開發者ID:SignalMedia,項目名稱:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda,代碼行數:33,代碼來源:stanford_segmenter.py

示例12: fromxml

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def fromxml(xml):
        if isinstance(xml, compat.string_types):
            xml = ElementTree.parse(xml)
        for key in xml.attrib:
            xml.attrib[key] = compat.text_type(xml.attrib[key])
        children = [child.get('ref') for child in xml.findall('item')]
        return Collection(children=children, **xml.attrib) 
開發者ID:SignalMedia,項目名稱:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda,代碼行數:9,代碼來源:downloader.py

示例13: _execute

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def _execute(self, cmd, input_, verbose=False):
        encoding = self._encoding
        cmd.extend(['-encoding', encoding])
        if self.corenlp_options:
            cmd.append(self.corenlp_options)

        default_options = ' '.join(_java_options)

        # Configure java.
        config_java(options=self.java_options, verbose=verbose)

        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
            # Write the actual sentences to the temporary input file
            if isinstance(input_, compat.text_type) and encoding:
                input_ = input_.encode(encoding)
            input_file.write(input_)
            input_file.flush()

            # Run the tagger and get the output.
            if self._USE_STDIN:
                input_file.seek(0)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
            else:
                cmd.append(input_file.name)
                stdout, stderr = java(cmd, classpath=self._classpath,
                                      stdout=PIPE, stderr=PIPE)

            stdout = stdout.decode(encoding)

        os.unlink(input_file.name)

        # Return java configurations to their default values.
        config_java(options=default_options, verbose=False)

        return stdout 
開發者ID:jarrellmark,項目名稱:neighborhood_mood_aws,代碼行數:39,代碼來源:stanford.py

示例14: tgrep_nodes

# 需要導入模塊: from nltk import compat [as 別名]
# 或者: from nltk.compat import text_type [as 別名]
def tgrep_nodes(pattern, trees, search_leaves=True):
    """
    Return the tree nodes in the trees which match the given pattern.

    :param pattern: a tgrep search pattern
    :type pattern: str or output of tgrep_compile()
    :param trees: a sequence of NLTK trees (usually ParentedTrees)
    :type trees: iter(ParentedTree) or iter(Tree)
    :param search_leaves: whether ot return matching leaf nodes
    :type search_leaves: bool
    :rtype: iter(tree nodes)
    """

    if isinstance(pattern, (binary_type, text_type)):
        pattern = tgrep_compile(pattern)

    for tree in trees:
        try:
            if search_leaves:
                positions = tree.treepositions()
            else:
                positions = treepositions_no_leaves(tree)
            yield [tree[position] for position in positions
                      if pattern(tree[position])]
        except AttributeError:
            yield [] 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:28,代碼來源:tgrep.py


注:本文中的nltk.compat.text_type方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。