Python unidecode.unidecode方法代码示例

本文整理汇总了Python中unidecode.unidecode方法的典型用法代码示例。如果您正苦于以下问题：Python unidecode.unidecode方法的具体用法？Python unidecode.unidecode怎么用？Python unidecode.unidecode使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类unidecode的用法示例。

在下文中一共展示了unidecode.unidecode方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: normalizestr

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def normalizestr(string):
    """ Converts special characters like copyright,
        trademark signs to ascii name """
    # print("input: '{}'".format(string))
    input_string = string
    for mark, ascii_repl in unicode_marks(string):
        string = string.replace(mark, ascii_repl)

    rv = []
#    for c in unicodedata.normalize('NFKC', smart_text(string)):
    for c in unicodedata.normalize('NFKC', string):
        # cat = unicodedata.category(c)[0]
        # if cat in 'LN' or c in ok:
        rv.append(c)

    new = ''.join(rv).strip()
    result = unidecode(new)
    if result != input_string:
        print("Fixed string: '{}'".format(result))
    return result

开发者ID:googlefonts，项目名称:gftools，代码行数:22，代码来源:gftools-fix-ascii-fontmetadata.py

示例2: findVideoLength

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def findVideoLength(dataset, youtube_id, api_key=None):
    '''
    Handle video length lookup
    '''
    try:
        youtube_id = unidecode(youtube_id)
    except Exception as err:
        print "youtube_id is not ascii?  ytid=", youtube_id
        return 0
    try:
        assert youtube_id is not None, "[analyze videos] youtube id does not exist"
        content, stats = get_youtube_api_stats(youtube_id=youtube_id, api_key=api_key, part=YOUTUBE_PARTS)
        durationDict = parseISOduration(content['duration'].encode("ascii","ignore"))
        length = getTotalTimeSecs(durationDict)
        print "[analyze videos] totalTime for youtube video %s is %s sec" % (youtube_id, length)
    except (AssertionError, Exception) as err:
        print "Failed to lookup video length for %s!  Error=%s, data=%s" % (youtube_id, err, dataset)
        length = 0
    return length

#-----------------------------------------------------------------------------

开发者ID:mitodl，项目名称:edx2bigquery，代码行数:23，代码来源:make_video_analysis.py

示例3: fuzzy_match

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def fuzzy_match(cls, query, response):
        """ Does the response match the query reasonably well ?
        >>> NLU_Helper.fuzzy_match("bastille", "Beuzeville-la-Bastille")
        False
        >>> NLU_Helper.fuzzy_match("paris 20", "Paris 20e Arrondissement")
        True
        >>> NLU_Helper.fuzzy_match("av victor hugo paris", "Avenue Victor Hugo")
        True
        """
        q = unidecode(query.strip()).lower()
        r = unidecode(response).lower()
        if r[: len(q)] == q:
            # Response starts with query
            return True
        if sum((Counter(r) - Counter(q)).values()) < len(q):
            # Number of missing chars to match the response is low
            # compared to the query length
            return True
        return False

开发者ID:QwantResearch，项目名称:idunn，代码行数:21，代码来源:nlu_client.py

示例4: save

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
        """
        - Make unique slug if it is not given.
        """
        if not self.slug:
            slug = slugify(unidecode(self.title))
            duplications = Contention.objects.filter(slug=slug)
            if duplications.exists():
                self.slug = "%s-%s" % (slug, uuid4().hex)
            else:
                self.slug = slug

        if not kwargs.pop('skip_date_update', False):
            self.date_modification = datetime.now()

        return super(Contention, self).save(*args, **kwargs)

开发者ID:arguman，项目名称:arguman.org，代码行数:18，代码来源:models.py

示例5: prepare_input

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def prepare_input(self, extracted_str):
        """
        Input raw string and do transformations, as set in template file.
        """

        # Remove withspace
        if self.options["remove_whitespace"]:
            optimized_str = re.sub(" +", "", extracted_str)
        else:
            optimized_str = extracted_str

        # Remove accents
        if self.options["remove_accents"]:
            optimized_str = unidecode(optimized_str)

        # convert to lower case
        if self.options["lowercase"]:
            optimized_str = optimized_str.lower()

        # specific replace
        for replace in self.options["replace"]:
            assert len(replace) == 2, "A replace should be a list of 2 items"
            optimized_str = optimized_str.replace(replace[0], replace[1])

        return optimized_str

开发者ID:invoice-x，项目名称:invoice2data，代码行数:27，代码来源:invoice_template.py

示例6: remove_diacritics

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
    """
    Removes diacritics using the `unidecode` package.

    :param: an str or unicode string
    :returns: if bytes: the same string. if str: the unidecoded string.

    >>> remove_diacritics('aéèï')
    'aeei'
    >>> remove_diacritics('aéè'.encode('utf-8'))
    b'a\\xc3\\xa9\\xc3\\xa8'
    """
    if isinstance(s, str):
        # for issue #305
        # because I have no idea what the general solution for this would be
        s = s.replace("’", "'")

        return unidecode(s)
    else:
        return s

开发者ID:dissemin，项目名称:dissemin，代码行数:22，代码来源:utils.py

示例7: get_url_markdown

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
  '''
  opener = urllib2.build_opener()
  opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
  try:
    j = opener.open(baseurl)
  except:
    return None
  data = j.read()
  '''
  urlHandler = urllib2.urlopen(baseurl)
  data = urlHandler.read()
  '''
  os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
  data = open('temp' + str(start)+"_"+str(increment),'rU').read()
  '''
  h = html2text.HTML2Text()
  h.ignore_links = True
  h.ignore_images = True
  h.body_width = 10000
  data = h.handle(unidecode(unicode(data,errors='ignore')))
  return unidecode(data)

开发者ID:schollz，项目名称:extract_recipe，代码行数:24，代码来源:downloadRecipes.py

示例8: get_url_markdown

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
  try:
    '''
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
    try:
      j = opener.open(baseurl)
    except:
      return None
    data = j.read()
    '''
    urlHandler = urllib2.urlopen(baseurl)
    data = urlHandler.read()
    '''
    os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
    data = open('temp' + str(start)+"_"+str(increment),'rU').read()
    '''
    h = html2text.HTML2Text()
    h.ignore_links = True
    h.ignore_images = True
    h.body_width = 10000
    data = h.handle(unidecode(unicode(data,errors='ignore')))
    return unidecode(data)
  except:
    return None

开发者ID:schollz，项目名称:extract_recipe，代码行数:27，代码来源:MdownloadRecipes.py

示例9: save

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
        if not self.slug:
            self.slug = slugify(unidecode(self.title))
        super(Group, self).save(*args, **kwargs)

开发者ID:slyapustin，项目名称:django-classified，代码行数:6，代码来源:models.py

示例10: transliterate

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def transliterate(alt_name):
    if has_unicode.search(alt_name["name"]):
        try:
            xlit = unidecode(alt_name["name"].decode("utf8"))
        except (UnicodeDecodeError, UnicodeEncodeError):
            try:
                xlit = unidecode(alt_name["name"].decode("latin1"))
            except (UnicodeEncodeError, UnicodeEncodeError):
                return
        if xlit != alt_name["name"]:
            addl_name = alt_name.copy()
            addl_name["lang"] = alt_name["lang"] + ":ascii"
            addl_name["name"] = xlit
            return addl_name

开发者ID:LibraryOfCongress，项目名称:gazetteer，代码行数:16，代码来源:core.py

示例11: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
    try:
        return unidecode(unicode(text, encoding = "utf-8"))
    except:
        return unidecode(str(text))

开发者ID:yaserkl，项目名称:TransferRL，代码行数:7，代码来源:cnn_dm_downloader.py

示例12: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
    try:
        return unicode(unidecode(unicode(text, encoding = "utf-8")))
    except:
        return str(unidecode(str(text)))

开发者ID:yaserkl，项目名称:TransferRL，代码行数:7，代码来源:newsroom_data_maker.py

示例13: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(self, text):
    try:
      return unicode(unidecode(unicode(text, encoding="utf-8")))
    except:
      return str(unidecode(text))

开发者ID:yaserkl，项目名称:TransferRL，代码行数:7，代码来源:decode.py

示例14: remove_diacritics

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
    return unidecode(s) if type(s) == unicode else s

开发者ID:dissemin，项目名称:oabot，代码行数:4，代码来源:main.py

示例15: _transliterated

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def _transliterated(method):
    def wrapper(self):
        return transliterate(method(self))

    functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
    if hasattr(method, "_nltk_compat_7bit"):
        wrapper._nltk_compat_7bit = method._nltk_compat_7bit

    wrapper._nltk_compat_transliterated = True
    return wrapper

开发者ID:rafasashi，项目名称:razzy-spinner，代码行数:12，代码来源:compat.py

注：本文中的unidecode.unidecode方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。