当前位置: 首页>>代码示例>>Python>>正文


Python unidecode.unidecode方法代码示例

本文整理汇总了Python中unidecode.unidecode方法的典型用法代码示例。如果您正苦于以下问题:Python unidecode.unidecode方法的具体用法?Python unidecode.unidecode怎么用?Python unidecode.unidecode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在unidecode的用法示例。


在下文中一共展示了unidecode.unidecode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: normalizestr

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def normalizestr(string):
    """ Converts special characters like copyright,
        trademark signs to ascii name """
    # print("input: '{}'".format(string))
    input_string = string
    for mark, ascii_repl in unicode_marks(string):
        string = string.replace(mark, ascii_repl)

    rv = []
#    for c in unicodedata.normalize('NFKC', smart_text(string)):
    for c in unicodedata.normalize('NFKC', string):
        # cat = unicodedata.category(c)[0]
        # if cat in 'LN' or c in ok:
        rv.append(c)

    new = ''.join(rv).strip()
    result = unidecode(new)
    if result != input_string:
        print("Fixed string: '{}'".format(result))
    return result 
开发者ID:googlefonts,项目名称:gftools,代码行数:22,代码来源:gftools-fix-ascii-fontmetadata.py

示例2: findVideoLength

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def findVideoLength(dataset, youtube_id, api_key=None):
    '''
    Handle video length lookup
    '''
    try:
        youtube_id = unidecode(youtube_id)
    except Exception as err:
        print "youtube_id is not ascii?  ytid=", youtube_id
        return 0
    try:
        assert youtube_id is not None, "[analyze videos] youtube id does not exist"
        content, stats = get_youtube_api_stats(youtube_id=youtube_id, api_key=api_key, part=YOUTUBE_PARTS)
        durationDict = parseISOduration(content['duration'].encode("ascii","ignore"))
        length = getTotalTimeSecs(durationDict)
        print "[analyze videos] totalTime for youtube video %s is %s sec" % (youtube_id, length)
    except (AssertionError, Exception) as err:
        print "Failed to lookup video length for %s!  Error=%s, data=%s" % (youtube_id, err, dataset)
        length = 0
    return length

#----------------------------------------------------------------------------- 
开发者ID:mitodl,项目名称:edx2bigquery,代码行数:23,代码来源:make_video_analysis.py

示例3: fuzzy_match

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def fuzzy_match(cls, query, response):
        """ Does the response match the query reasonably well ?
        >>> NLU_Helper.fuzzy_match("bastille", "Beuzeville-la-Bastille")
        False
        >>> NLU_Helper.fuzzy_match("paris 20", "Paris 20e Arrondissement")
        True
        >>> NLU_Helper.fuzzy_match("av victor hugo paris", "Avenue Victor Hugo")
        True
        """
        q = unidecode(query.strip()).lower()
        r = unidecode(response).lower()
        if r[: len(q)] == q:
            # Response starts with query
            return True
        if sum((Counter(r) - Counter(q)).values()) < len(q):
            # Number of missing chars to match the response is low
            # compared to the query length
            return True
        return False 
开发者ID:QwantResearch,项目名称:idunn,代码行数:21,代码来源:nlu_client.py

示例4: save

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
        """
        - Make unique slug if it is not given.
        """
        if not self.slug:
            slug = slugify(unidecode(self.title))
            duplications = Contention.objects.filter(slug=slug)
            if duplications.exists():
                self.slug = "%s-%s" % (slug, uuid4().hex)
            else:
                self.slug = slug

        if not kwargs.pop('skip_date_update', False):
            self.date_modification = datetime.now()

        return super(Contention, self).save(*args, **kwargs) 
开发者ID:arguman,项目名称:arguman.org,代码行数:18,代码来源:models.py

示例5: prepare_input

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def prepare_input(self, extracted_str):
        """
        Input raw string and do transformations, as set in template file.
        """

        # Remove withspace
        if self.options["remove_whitespace"]:
            optimized_str = re.sub(" +", "", extracted_str)
        else:
            optimized_str = extracted_str

        # Remove accents
        if self.options["remove_accents"]:
            optimized_str = unidecode(optimized_str)

        # convert to lower case
        if self.options["lowercase"]:
            optimized_str = optimized_str.lower()

        # specific replace
        for replace in self.options["replace"]:
            assert len(replace) == 2, "A replace should be a list of 2 items"
            optimized_str = optimized_str.replace(replace[0], replace[1])

        return optimized_str 
开发者ID:invoice-x,项目名称:invoice2data,代码行数:27,代码来源:invoice_template.py

示例6: remove_diacritics

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
    """
    Removes diacritics using the `unidecode` package.

    :param: an str or unicode string
    :returns: if bytes: the same string. if str: the unidecoded string.

    >>> remove_diacritics('aéèï')
    'aeei'
    >>> remove_diacritics('aéè'.encode('utf-8'))
    b'a\\xc3\\xa9\\xc3\\xa8'
    """
    if isinstance(s, str):
        # for issue #305
        # because I have no idea what the general solution for this would be
        s = s.replace("’", "'")

        return unidecode(s)
    else:
        return s 
开发者ID:dissemin,项目名称:dissemin,代码行数:22,代码来源:utils.py

示例7: get_url_markdown

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
  '''
  opener = urllib2.build_opener()
  opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
  try:
    j = opener.open(baseurl)
  except:
    return None
  data = j.read()
  '''
  urlHandler = urllib2.urlopen(baseurl)
  data = urlHandler.read()
  '''
  os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
  data = open('temp' + str(start)+"_"+str(increment),'rU').read()
  '''
  h = html2text.HTML2Text()
  h.ignore_links = True
  h.ignore_images = True
  h.body_width = 10000
  data = h.handle(unidecode(unicode(data,errors='ignore')))
  return unidecode(data) 
开发者ID:schollz,项目名称:extract_recipe,代码行数:24,代码来源:downloadRecipes.py

示例8: get_url_markdown

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def get_url_markdown(baseurl,start,increment):
  try:
    '''
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
    try:
      j = opener.open(baseurl)
    except:
      return None
    data = j.read()
    '''
    urlHandler = urllib2.urlopen(baseurl)
    data = urlHandler.read()
    '''
    os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
    data = open('temp' + str(start)+"_"+str(increment),'rU').read()
    '''
    h = html2text.HTML2Text()
    h.ignore_links = True
    h.ignore_images = True
    h.body_width = 10000
    data = h.handle(unidecode(unicode(data,errors='ignore')))
    return unidecode(data)
  except:
    return None 
开发者ID:schollz,项目名称:extract_recipe,代码行数:27,代码来源:MdownloadRecipes.py

示例9: save

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def save(self, *args, **kwargs):
        if not self.slug:
            self.slug = slugify(unidecode(self.title))
        super(Group, self).save(*args, **kwargs) 
开发者ID:slyapustin,项目名称:django-classified,代码行数:6,代码来源:models.py

示例10: transliterate

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def transliterate(alt_name):
    if has_unicode.search(alt_name["name"]):
        try:
            xlit = unidecode(alt_name["name"].decode("utf8"))
        except (UnicodeDecodeError, UnicodeEncodeError):
            try:
                xlit = unidecode(alt_name["name"].decode("latin1"))
            except (UnicodeEncodeError, UnicodeEncodeError):
                return
        if xlit != alt_name["name"]:
            addl_name = alt_name.copy()
            addl_name["lang"] = alt_name["lang"] + ":ascii"
            addl_name["name"] = xlit
            return addl_name 
开发者ID:LibraryOfCongress,项目名称:gazetteer,代码行数:16,代码来源:core.py

示例11: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
    try:
        return unidecode(unicode(text, encoding = "utf-8"))
    except:
        return unidecode(str(text)) 
开发者ID:yaserkl,项目名称:TransferRL,代码行数:7,代码来源:cnn_dm_downloader.py

示例12: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(text):
    try:
        return unicode(unidecode(unicode(text, encoding = "utf-8")))
    except:
        return str(unidecode(str(text))) 
开发者ID:yaserkl,项目名称:TransferRL,代码行数:7,代码来源:newsroom_data_maker.py

示例13: remove_non_ascii

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_non_ascii(self, text):
    try:
      return unicode(unidecode(unicode(text, encoding="utf-8")))
    except:
      return str(unidecode(text)) 
开发者ID:yaserkl,项目名称:TransferRL,代码行数:7,代码来源:decode.py

示例14: remove_diacritics

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def remove_diacritics(s):
    return unidecode(s) if type(s) == unicode else s 
开发者ID:dissemin,项目名称:oabot,代码行数:4,代码来源:main.py

示例15: _transliterated

# 需要导入模块: import unidecode [as 别名]
# 或者: from unidecode import unidecode [as 别名]
def _transliterated(method):
    def wrapper(self):
        return transliterate(method(self))

    functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
    if hasattr(method, "_nltk_compat_7bit"):
        wrapper._nltk_compat_7bit = method._nltk_compat_7bit

    wrapper._nltk_compat_transliterated = True
    return wrapper 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:12,代码来源:compat.py


注:本文中的unidecode.unidecode方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。