本文整理汇总了Python中unidecode.unidecode方法的典型用法代码示例。如果您正苦于以下问题:Python unidecode.unidecode方法的具体用法?Python unidecode.unidecode怎么用?Python unidecode.unidecode使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类unidecode
的用法示例。
在下文中一共展示了unidecode.unidecode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: normalizestr
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def normalizestr(string):
""" Converts special characters like copyright,
trademark signs to ascii name """
# print("input: '{}'".format(string))
input_string = string
for mark, ascii_repl in unicode_marks(string):
string = string.replace(mark, ascii_repl)
rv = []
# for c in unicodedata.normalize('NFKC', smart_text(string)):
for c in unicodedata.normalize('NFKC', string):
# cat = unicodedata.category(c)[0]
# if cat in 'LN' or c in ok:
rv.append(c)
new = ''.join(rv).strip()
result = unidecode(new)
if result != input_string:
print("Fixed string: '{}'".format(result))
return result
示例2: findVideoLength
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def findVideoLength(dataset, youtube_id, api_key=None):
'''
Handle video length lookup
'''
try:
youtube_id = unidecode(youtube_id)
except Exception as err:
print "youtube_id is not ascii? ytid=", youtube_id
return 0
try:
assert youtube_id is not None, "[analyze videos] youtube id does not exist"
content, stats = get_youtube_api_stats(youtube_id=youtube_id, api_key=api_key, part=YOUTUBE_PARTS)
durationDict = parseISOduration(content['duration'].encode("ascii","ignore"))
length = getTotalTimeSecs(durationDict)
print "[analyze videos] totalTime for youtube video %s is %s sec" % (youtube_id, length)
except (AssertionError, Exception) as err:
print "Failed to lookup video length for %s! Error=%s, data=%s" % (youtube_id, err, dataset)
length = 0
return length
#-----------------------------------------------------------------------------
示例3: fuzzy_match
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def fuzzy_match(cls, query, response):
""" Does the response match the query reasonably well ?
>>> NLU_Helper.fuzzy_match("bastille", "Beuzeville-la-Bastille")
False
>>> NLU_Helper.fuzzy_match("paris 20", "Paris 20e Arrondissement")
True
>>> NLU_Helper.fuzzy_match("av victor hugo paris", "Avenue Victor Hugo")
True
"""
q = unidecode(query.strip()).lower()
r = unidecode(response).lower()
if r[: len(q)] == q:
# Response starts with query
return True
if sum((Counter(r) - Counter(q)).values()) < len(q):
# Number of missing chars to match the response is low
# compared to the query length
return True
return False
示例4: save
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
"""
- Make unique slug if it is not given.
"""
if not self.slug:
slug = slugify(unidecode(self.title))
duplications = Contention.objects.filter(slug=slug)
if duplications.exists():
self.slug = "%s-%s" % (slug, uuid4().hex)
else:
self.slug = slug
if not kwargs.pop('skip_date_update', False):
self.date_modification = datetime.now()
return super(Contention, self).save(*args, **kwargs)
示例5: prepare_input
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def prepare_input(self, extracted_str):
"""
Input raw string and do transformations, as set in template file.
"""
# Remove withspace
if self.options["remove_whitespace"]:
optimized_str = re.sub(" +", "", extracted_str)
else:
optimized_str = extracted_str
# Remove accents
if self.options["remove_accents"]:
optimized_str = unidecode(optimized_str)
# convert to lower case
if self.options["lowercase"]:
optimized_str = optimized_str.lower()
# specific replace
for replace in self.options["replace"]:
assert len(replace) == 2, "A replace should be a list of 2 items"
optimized_str = optimized_str.replace(replace[0], replace[1])
return optimized_str
示例6: remove_diacritics
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
"""
Removes diacritics using the `unidecode` package.
:param: an str or unicode string
:returns: if bytes: the same string. if str: the unidecoded string.
>>> remove_diacritics('aéèï')
'aeei'
>>> remove_diacritics('aéè'.encode('utf-8'))
b'a\\xc3\\xa9\\xc3\\xa8'
"""
if isinstance(s, str):
# for issue #305
# because I have no idea what the general solution for this would be
s = s.replace("’", "'")
return unidecode(s)
else:
return s
示例7: get_url_markdown
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
'''
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
try:
j = opener.open(baseurl)
except:
return None
data = j.read()
'''
urlHandler = urllib2.urlopen(baseurl)
data = urlHandler.read()
'''
os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
data = open('temp' + str(start)+"_"+str(increment),'rU').read()
'''
h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
h.body_width = 10000
data = h.handle(unidecode(unicode(data,errors='ignore')))
return unidecode(data)
示例8: get_url_markdown
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
try:
'''
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
try:
j = opener.open(baseurl)
except:
return None
data = j.read()
'''
urlHandler = urllib2.urlopen(baseurl)
data = urlHandler.read()
'''
os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
data = open('temp' + str(start)+"_"+str(increment),'rU').read()
'''
h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
h.body_width = 10000
data = h.handle(unidecode(unicode(data,errors='ignore')))
return unidecode(data)
except:
return None
示例9: save
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
if not self.slug:
self.slug = slugify(unidecode(self.title))
super(Group, self).save(*args, **kwargs)
示例10: transliterate
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def transliterate(alt_name):
if has_unicode.search(alt_name["name"]):
try:
xlit = unidecode(alt_name["name"].decode("utf8"))
except (UnicodeDecodeError, UnicodeEncodeError):
try:
xlit = unidecode(alt_name["name"].decode("latin1"))
except (UnicodeEncodeError, UnicodeEncodeError):
return
if xlit != alt_name["name"]:
addl_name = alt_name.copy()
addl_name["lang"] = alt_name["lang"] + ":ascii"
addl_name["name"] = xlit
return addl_name
示例11: remove_non_ascii
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
try:
return unidecode(unicode(text, encoding = "utf-8"))
except:
return unidecode(str(text))
示例12: remove_non_ascii
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
try:
return unicode(unidecode(unicode(text, encoding = "utf-8")))
except:
return str(unidecode(str(text)))
示例13: remove_non_ascii
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(self, text):
try:
return unicode(unidecode(unicode(text, encoding="utf-8")))
except:
return str(unidecode(text))
示例14: remove_diacritics
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
return unidecode(s) if type(s) == unicode else s
示例15: _transliterated
# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def _transliterated(method):
def wrapper(self):
return transliterate(method(self))
functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
if hasattr(method, "_nltk_compat_7bit"):
wrapper._nltk_compat_7bit = method._nltk_compat_7bit
wrapper._nltk_compat_transliterated = True
return wrapper