当前位置: 首页>>代码示例>>Python>>正文


Python utils.text_type方法代码示例

本文整理汇总了Python中future.utils.text_type方法的典型用法代码示例。如果您正苦于以下问题:Python utils.text_type方法的具体用法?Python utils.text_type怎么用?Python utils.text_type使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在future.utils的用法示例。


在下文中一共展示了utils.text_type方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __is_valid_pos

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __is_valid_pos(pos_tuple, valid_pos):
    # type: (Tuple[text_type,...],List[Tuple[text_type,...]])->bool
    """This function checks token's pos is with in POS set that user specified.
    If token meets all conditions, Return True; else return False
    """
    def is_valid_pos(valid_pos_tuple):
        # type: (Tuple[text_type,...])->bool
        length_valid_pos_tuple = len(valid_pos_tuple)
        if valid_pos_tuple == pos_tuple[:length_valid_pos_tuple]:
            return True
        else:
            return False

    seq_bool_flags = [is_valid_pos(valid_pos_tuple) for valid_pos_tuple in valid_pos]

    if True in set(seq_bool_flags):
        return True
    else:
        return False 
开发者ID:Kensuke-Mitsuzawa,项目名称:JapaneseTokenizers,代码行数:21,代码来源:datamodels.py

示例2: __convert_string_type

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __convert_string_type(self, p_c_tuple):
        # type: (Tuple[text_type,...])->Tuple[text_type]
        """* What you can do
        - it normalizes string types into str
        """
        if not isinstance(p_c_tuple, tuple):
            raise Exception('Pos condition expects tuple of string. However = {}'.format(p_c_tuple))

        converted = [text_type] * len(p_c_tuple)
        for i, pos_element in enumerate(p_c_tuple):
            if six.PY2 and isinstance(pos_element, str):
                """str into unicode if python2.x"""
                converted[i] = pos_element.decode(self.string_encoding)
            elif six.PY2 and isinstance(pos_element, text_type):
                converted[i] = pos_element
            elif six.PY3:
                converted[i] = pos_element
            else:
                raise Exception()

        return tuple(converted) 
开发者ID:Kensuke-Mitsuzawa,项目名称:JapaneseTokenizers,代码行数:23,代码来源:datamodels.py

示例3: __init__

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __init__(self,
                 node_obj,
                 tuple_pos,
                 word_stem,
                 word_surface,
                 is_feature=True,
                 is_surface=False,
                 misc_info=None,
                 analyzed_line=None):
        # type: (Optional[Node], Tuple[text_type, ...], str, str, bool, bool, Optional[Dict[str, Any]], str)->None
        assert isinstance(node_obj, (Node, type(None)))
        assert isinstance(tuple_pos, (string_types, tuple))
        assert isinstance(word_stem, (string_types))
        assert isinstance(word_surface, text_type)
        assert isinstance(misc_info, (type(None), dict))

        self.node_obj = node_obj
        self.word_stem = word_stem
        self.word_surface = word_surface
        self.is_surface = is_surface
        self.is_feature = is_feature
        self.misc_info = misc_info
        self.analyzed_line = analyzed_line

        if isinstance(tuple_pos, tuple):
            self.tuple_pos = tuple_pos
        elif isinstance(tuple_pos, string_types):
            self.tuple_pos = ('*', )
        else:
            raise Exception('Error while parsing feature object. {}'.format(tuple_pos)) 
开发者ID:Kensuke-Mitsuzawa,项目名称:JapaneseTokenizers,代码行数:32,代码来源:datamodels.py

示例4: unicode_string

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def unicode_string(string):
    if isinstance(string, text_type):
        return string
    if isinstance(string, bytes):
        return string.decode("utf8")
    if isinstance(string, newstr):
        return text_type(string)
    if isinstance(string, newbytes):
        string = bytes(string).decode("utf8")

    raise TypeError("Cannot convert %s into unicode string" % type(string)) 
开发者ID:snipsco,项目名称:snips-nlu,代码行数:13,代码来源:utils.py

示例5: sql_quote

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def sql_quote(value):
    if value == Null:
        return "NULL"
    elif value is True:
        return "0"
    elif value is False:
        return "1"
    elif isinstance(value, unicode):
        return "'" + value.replace("'", "''") + "'"
    else:
        return text_type(value) 
开发者ID:mozilla,项目名称:jx-sqlite,代码行数:13,代码来源:expressions.py

示例6: s3_bucket

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def s3_bucket(s3_server):  # pylint: disable=redefined-outer-name
    """
    Creates a function-scoped s3 bucket,
    returning a BucketInfo namedtuple with `s3_bucket.client` and `s3_bucket.name` fields
    """
    client = s3_server.get_s3_client()
    bucket_name = text_type(uuid.uuid4())
    client.create_bucket(Bucket=bucket_name)
    return BucketInfo(client, bucket_name) 
开发者ID:man-group,项目名称:pytest-plugins,代码行数:11,代码来源:s3.py

示例7: run_cmd

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def run_cmd(self):
        cmdargs = [
            CONFIG.minio_executable,
            "server",
            "--address",
            "{}:{}".format(self.hostname, self.port),
            text_type(self.datadir),
        ]
        return cmdargs 
开发者ID:man-group,项目名称:pytest-plugins,代码行数:11,代码来源:s3.py

示例8: grab_commit_info

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def grab_commit_info(c, rev):
    # Extract information about committer and files using git show
    f = subprocess.Popen(shlex.split("git show --raw --pretty=full %s" % rev),
                         stdout=subprocess.PIPE)

    files = []
    comments = []

    while True:
        line = f.stdout.readline().decode(encoding)
        if not line:
            break

        if line.startswith(4 * ' '):
            comments.append(line[4:])

        m = re.match(r"^:.*[MAD]\s+(.+)$", line)
        if m:
            logging.debug("Got file: %s", m.group(1))
            files.append(text_type(m.group(1)))
            continue

        m = re.match(r"^Author:\s+(.+)$", line)
        if m:
            logging.debug("Got author: %s", m.group(1))
            c['who'] = text_type(m.group(1))

        if re.match(r"^Merge: .*$", line):
            files.append('merge')

    c['comments'] = ''.join(comments)
    c['files'] = files
    status = f.wait()
    if status:
        logging.warning("git show exited with status %d", status) 
开发者ID:buildbot,项目名称:buildbot-contrib,代码行数:37,代码来源:git_buildbot.py

示例9: gen_changes

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def gen_changes(input, branch):
    while True:
        line = input.stdout.readline().decode(encoding)
        if not line:
            break

        logging.debug("Change: %s", line)

        m = re.match(r"^([0-9a-f]+) (.*)$", line.strip())
        c = {'revision': m.group(1),
             'branch': text_type(branch),
             }

        if category:
            c['category'] = text_type(category)

        if repository:
            c['repository'] = text_type(repository)

        if project:
            c['project'] = text_type(project)

        if codebase:
            c['codebase'] = text_type(codebase)

        grab_commit_info(c, m.group(1))
        changes.append(c) 
开发者ID:buildbot,项目名称:buildbot-contrib,代码行数:29,代码来源:git_buildbot.py

示例10: filter_words

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def filter_words(tokenized_obj, valid_pos, stopwords, check_field_name='stem'):
    # type: (TokenizedSenetence, List[Tuple[text_type,...]], List[text_type],text_type) -> FilteredObject
    """This function filter token that user don't want to take.
    Condition is stopword and pos.

    * Input
    - valid_pos
        - List of Tuple which has POS element to keep.
        - Keep in your mind, each tokenizer has different POS structure.
         >>> [('名詞', '固有名詞'), ('動詞', )]
    - stopwords
        - List of str, which you'd like to remove
        >>> ['残念', '今日']
    """
    assert isinstance(tokenized_obj, TokenizedSenetence)
    assert isinstance(valid_pos, list)
    assert isinstance(stopwords, list)

    filtered_tokens = []
    for token_obj in tokenized_obj.tokenized_objects:
        assert isinstance(token_obj, TokenizedResult)
        if check_field_name=='stem':
            res_stopwords = __is_sotpwords(token_obj.word_stem, stopwords)
        else:
            res_stopwords = __is_sotpwords(token_obj.word_surface, stopwords)

        res_pos_condition = __is_valid_pos(token_obj.tuple_pos, valid_pos)

        # case1: only pos filtering is ON
        if valid_pos != [] and stopwords == []:
            if res_pos_condition: filtered_tokens.append(token_obj)
        # case2: only stopwords filtering is ON
        if valid_pos == [] and stopwords != []:
            if res_stopwords is False: filtered_tokens.append(token_obj)
        # case3: both condition is ON
        if valid_pos != [] and stopwords != []:
            if res_stopwords is False and res_pos_condition: filtered_tokens.append(token_obj)

    filtered_object = FilteredObject(
        sentence=tokenized_obj.sentence,
        tokenized_objects=filtered_tokens,
        pos_condition=valid_pos,
        stopwords=stopwords
    )

    return filtered_object 
开发者ID:Kensuke-Mitsuzawa,项目名称:JapaneseTokenizers,代码行数:48,代码来源:datamodels.py

示例11: filter

# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def filter(self,
               pos_condition=None,
               stopwords=None,
               is_normalize=True,
               func_normalizer=normalize_text,
               check_field_name='stem'):
        # type: (List[Tuple[text_type,...]], List[text_type], bool, Callable[[text_type], text_type],text_type)->FilteredObject
        """* What you can do
        - It filters out token which does NOT meet the conditions (stopwords & part-of-speech tag)
        - Under python2.x, pos_condition & stopwords are converted into unicode type.

        * Parameters
        - pos_condition: list of part-of-speech(pos) condition. The pos condition is tuple is variable length.
        You can specify hierarchical structure of pos condition with variable tuple.
        The hierarchy of pos condition follows definition of dictionary.
            - For example, in mecab you can take words with 名詞 if ('名詞',)
            - For example, in mecab you can take words with 名詞-固有名詞 if ('名詞', '固有名詞')
        - stopwords: list of word which you would like to remove
        - is_normalize: Boolean flag for normalize stopwords.
        - func_normalizer: Function object for normalization. The function object must be the same one as when you use tokenize.
        - check_field_name: Put field name to check if stopword or NOT. Kytea does not have stem form of word, put 'surface' instead.

        * Example
        >>> pos_condition = [('名詞', '一般'), ('形容詞', '自立'), ('助詞', '格助詞', '一般')]
        >>> stopwords = ['これ', 'それ']
        """
        assert isinstance(pos_condition, (type(None), list))
        assert isinstance(stopwords, (type(None), list))

        if stopwords is None:
            s_words = []
        elif six.PY2 and all((isinstance(s, str) for s in stopwords)):
            """under python2.x, from str into unicode"""
            if is_normalize:
                s_words = [func_normalizer(s.decode(self.string_encoding)) for s in stopwords]
            else:
                s_words = [s.decode(self.string_encoding) for s in stopwords]
        else:
            if is_normalize:
                s_words = [func_normalizer(s) for s in stopwords]
            else:
                s_words = stopwords


        if pos_condition is None:
            p_condition = []
        else:
            p_condition = self.__check_pos_condition(pos_condition)

        filtered_object = filter_words(
            tokenized_obj=self,
            valid_pos=p_condition,
            stopwords=s_words,
            check_field_name=check_field_name
        )
        assert isinstance(filtered_object, FilteredObject)

        return filtered_object 
开发者ID:Kensuke-Mitsuzawa,项目名称:JapaneseTokenizers,代码行数:60,代码来源:datamodels.py


注:本文中的future.utils.text_type方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。