本文整理汇总了Python中future.utils.text_type方法的典型用法代码示例。如果您正苦于以下问题:Python utils.text_type方法的具体用法?Python utils.text_type怎么用?Python utils.text_type使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类future.utils
的用法示例。
在下文中一共展示了utils.text_type方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __is_valid_pos
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __is_valid_pos(pos_tuple, valid_pos):
# type: (Tuple[text_type,...],List[Tuple[text_type,...]])->bool
"""This function checks token's pos is with in POS set that user specified.
If token meets all conditions, Return True; else return False
"""
def is_valid_pos(valid_pos_tuple):
# type: (Tuple[text_type,...])->bool
length_valid_pos_tuple = len(valid_pos_tuple)
if valid_pos_tuple == pos_tuple[:length_valid_pos_tuple]:
return True
else:
return False
seq_bool_flags = [is_valid_pos(valid_pos_tuple) for valid_pos_tuple in valid_pos]
if True in set(seq_bool_flags):
return True
else:
return False
示例2: __convert_string_type
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __convert_string_type(self, p_c_tuple):
# type: (Tuple[text_type,...])->Tuple[text_type]
"""* What you can do
- it normalizes string types into str
"""
if not isinstance(p_c_tuple, tuple):
raise Exception('Pos condition expects tuple of string. However = {}'.format(p_c_tuple))
converted = [text_type] * len(p_c_tuple)
for i, pos_element in enumerate(p_c_tuple):
if six.PY2 and isinstance(pos_element, str):
"""str into unicode if python2.x"""
converted[i] = pos_element.decode(self.string_encoding)
elif six.PY2 and isinstance(pos_element, text_type):
converted[i] = pos_element
elif six.PY3:
converted[i] = pos_element
else:
raise Exception()
return tuple(converted)
示例3: __init__
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def __init__(self,
node_obj,
tuple_pos,
word_stem,
word_surface,
is_feature=True,
is_surface=False,
misc_info=None,
analyzed_line=None):
# type: (Optional[Node], Tuple[text_type, ...], str, str, bool, bool, Optional[Dict[str, Any]], str)->None
assert isinstance(node_obj, (Node, type(None)))
assert isinstance(tuple_pos, (string_types, tuple))
assert isinstance(word_stem, (string_types))
assert isinstance(word_surface, text_type)
assert isinstance(misc_info, (type(None), dict))
self.node_obj = node_obj
self.word_stem = word_stem
self.word_surface = word_surface
self.is_surface = is_surface
self.is_feature = is_feature
self.misc_info = misc_info
self.analyzed_line = analyzed_line
if isinstance(tuple_pos, tuple):
self.tuple_pos = tuple_pos
elif isinstance(tuple_pos, string_types):
self.tuple_pos = ('*', )
else:
raise Exception('Error while parsing feature object. {}'.format(tuple_pos))
示例4: unicode_string
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def unicode_string(string):
if isinstance(string, text_type):
return string
if isinstance(string, bytes):
return string.decode("utf8")
if isinstance(string, newstr):
return text_type(string)
if isinstance(string, newbytes):
string = bytes(string).decode("utf8")
raise TypeError("Cannot convert %s into unicode string" % type(string))
示例5: sql_quote
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def sql_quote(value):
if value == Null:
return "NULL"
elif value is True:
return "0"
elif value is False:
return "1"
elif isinstance(value, unicode):
return "'" + value.replace("'", "''") + "'"
else:
return text_type(value)
示例6: s3_bucket
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def s3_bucket(s3_server): # pylint: disable=redefined-outer-name
"""
Creates a function-scoped s3 bucket,
returning a BucketInfo namedtuple with `s3_bucket.client` and `s3_bucket.name` fields
"""
client = s3_server.get_s3_client()
bucket_name = text_type(uuid.uuid4())
client.create_bucket(Bucket=bucket_name)
return BucketInfo(client, bucket_name)
示例7: run_cmd
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def run_cmd(self):
cmdargs = [
CONFIG.minio_executable,
"server",
"--address",
"{}:{}".format(self.hostname, self.port),
text_type(self.datadir),
]
return cmdargs
示例8: grab_commit_info
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def grab_commit_info(c, rev):
# Extract information about committer and files using git show
f = subprocess.Popen(shlex.split("git show --raw --pretty=full %s" % rev),
stdout=subprocess.PIPE)
files = []
comments = []
while True:
line = f.stdout.readline().decode(encoding)
if not line:
break
if line.startswith(4 * ' '):
comments.append(line[4:])
m = re.match(r"^:.*[MAD]\s+(.+)$", line)
if m:
logging.debug("Got file: %s", m.group(1))
files.append(text_type(m.group(1)))
continue
m = re.match(r"^Author:\s+(.+)$", line)
if m:
logging.debug("Got author: %s", m.group(1))
c['who'] = text_type(m.group(1))
if re.match(r"^Merge: .*$", line):
files.append('merge')
c['comments'] = ''.join(comments)
c['files'] = files
status = f.wait()
if status:
logging.warning("git show exited with status %d", status)
示例9: gen_changes
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def gen_changes(input, branch):
while True:
line = input.stdout.readline().decode(encoding)
if not line:
break
logging.debug("Change: %s", line)
m = re.match(r"^([0-9a-f]+) (.*)$", line.strip())
c = {'revision': m.group(1),
'branch': text_type(branch),
}
if category:
c['category'] = text_type(category)
if repository:
c['repository'] = text_type(repository)
if project:
c['project'] = text_type(project)
if codebase:
c['codebase'] = text_type(codebase)
grab_commit_info(c, m.group(1))
changes.append(c)
示例10: filter_words
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def filter_words(tokenized_obj, valid_pos, stopwords, check_field_name='stem'):
# type: (TokenizedSenetence, List[Tuple[text_type,...]], List[text_type],text_type) -> FilteredObject
"""This function filter token that user don't want to take.
Condition is stopword and pos.
* Input
- valid_pos
- List of Tuple which has POS element to keep.
- Keep in your mind, each tokenizer has different POS structure.
>>> [('名詞', '固有名詞'), ('動詞', )]
- stopwords
- List of str, which you'd like to remove
>>> ['残念', '今日']
"""
assert isinstance(tokenized_obj, TokenizedSenetence)
assert isinstance(valid_pos, list)
assert isinstance(stopwords, list)
filtered_tokens = []
for token_obj in tokenized_obj.tokenized_objects:
assert isinstance(token_obj, TokenizedResult)
if check_field_name=='stem':
res_stopwords = __is_sotpwords(token_obj.word_stem, stopwords)
else:
res_stopwords = __is_sotpwords(token_obj.word_surface, stopwords)
res_pos_condition = __is_valid_pos(token_obj.tuple_pos, valid_pos)
# case1: only pos filtering is ON
if valid_pos != [] and stopwords == []:
if res_pos_condition: filtered_tokens.append(token_obj)
# case2: only stopwords filtering is ON
if valid_pos == [] and stopwords != []:
if res_stopwords is False: filtered_tokens.append(token_obj)
# case3: both condition is ON
if valid_pos != [] and stopwords != []:
if res_stopwords is False and res_pos_condition: filtered_tokens.append(token_obj)
filtered_object = FilteredObject(
sentence=tokenized_obj.sentence,
tokenized_objects=filtered_tokens,
pos_condition=valid_pos,
stopwords=stopwords
)
return filtered_object
示例11: filter
# 需要导入模块: from future import utils [as 别名]
# 或者: from future.utils import text_type [as 别名]
def filter(self,
pos_condition=None,
stopwords=None,
is_normalize=True,
func_normalizer=normalize_text,
check_field_name='stem'):
# type: (List[Tuple[text_type,...]], List[text_type], bool, Callable[[text_type], text_type],text_type)->FilteredObject
"""* What you can do
- It filters out token which does NOT meet the conditions (stopwords & part-of-speech tag)
- Under python2.x, pos_condition & stopwords are converted into unicode type.
* Parameters
- pos_condition: list of part-of-speech(pos) condition. The pos condition is tuple is variable length.
You can specify hierarchical structure of pos condition with variable tuple.
The hierarchy of pos condition follows definition of dictionary.
- For example, in mecab you can take words with 名詞 if ('名詞',)
- For example, in mecab you can take words with 名詞-固有名詞 if ('名詞', '固有名詞')
- stopwords: list of word which you would like to remove
- is_normalize: Boolean flag for normalize stopwords.
- func_normalizer: Function object for normalization. The function object must be the same one as when you use tokenize.
- check_field_name: Put field name to check if stopword or NOT. Kytea does not have stem form of word, put 'surface' instead.
* Example
>>> pos_condition = [('名詞', '一般'), ('形容詞', '自立'), ('助詞', '格助詞', '一般')]
>>> stopwords = ['これ', 'それ']
"""
assert isinstance(pos_condition, (type(None), list))
assert isinstance(stopwords, (type(None), list))
if stopwords is None:
s_words = []
elif six.PY2 and all((isinstance(s, str) for s in stopwords)):
"""under python2.x, from str into unicode"""
if is_normalize:
s_words = [func_normalizer(s.decode(self.string_encoding)) for s in stopwords]
else:
s_words = [s.decode(self.string_encoding) for s in stopwords]
else:
if is_normalize:
s_words = [func_normalizer(s) for s in stopwords]
else:
s_words = stopwords
if pos_condition is None:
p_condition = []
else:
p_condition = self.__check_pos_condition(pos_condition)
filtered_object = filter_words(
tokenized_obj=self,
valid_pos=p_condition,
stopwords=s_words,
check_field_name=check_field_name
)
assert isinstance(filtered_object, FilteredObject)
return filtered_object