當前位置: 首頁>>代碼示例>>Python>>正文


Python unidecode.unidecode方法代碼示例

本文整理匯總了Python中unidecode.unidecode方法的典型用法代碼示例。如果您正苦於以下問題:Python unidecode.unidecode方法的具體用法?Python unidecode.unidecode怎麽用?Python unidecode.unidecode使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在unidecode的用法示例。


在下文中一共展示了unidecode.unidecode方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: normalizestr

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def normalizestr(string):
    """ Converts special characters like copyright,
        trademark signs to ascii name """
    # print("input: '{}'".format(string))
    input_string = string
    for mark, ascii_repl in unicode_marks(string):
        string = string.replace(mark, ascii_repl)

    rv = []
#    for c in unicodedata.normalize('NFKC', smart_text(string)):
    for c in unicodedata.normalize('NFKC', string):
        # cat = unicodedata.category(c)[0]
        # if cat in 'LN' or c in ok:
        rv.append(c)

    new = ''.join(rv).strip()
    result = unidecode(new)
    if result != input_string:
        print("Fixed string: '{}'".format(result))
    return result 
開發者ID:googlefonts,項目名稱:gftools,代碼行數:22,代碼來源:gftools-fix-ascii-fontmetadata.py

示例2: findVideoLength

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def findVideoLength(dataset, youtube_id, api_key=None):
    '''
    Handle video length lookup
    '''
    try:
        youtube_id = unidecode(youtube_id)
    except Exception as err:
        print "youtube_id is not ascii?  ytid=", youtube_id
        return 0
    try:
        assert youtube_id is not None, "[analyze videos] youtube id does not exist"
        content, stats = get_youtube_api_stats(youtube_id=youtube_id, api_key=api_key, part=YOUTUBE_PARTS)
        durationDict = parseISOduration(content['duration'].encode("ascii","ignore"))
        length = getTotalTimeSecs(durationDict)
        print "[analyze videos] totalTime for youtube video %s is %s sec" % (youtube_id, length)
    except (AssertionError, Exception) as err:
        print "Failed to lookup video length for %s!  Error=%s, data=%s" % (youtube_id, err, dataset)
        length = 0
    return length

#----------------------------------------------------------------------------- 
開發者ID:mitodl,項目名稱:edx2bigquery,代碼行數:23,代碼來源:make_video_analysis.py

示例3: fuzzy_match

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def fuzzy_match(cls, query, response):
        """ Does the response match the query reasonably well ?
        >>> NLU_Helper.fuzzy_match("bastille", "Beuzeville-la-Bastille")
        False
        >>> NLU_Helper.fuzzy_match("paris 20", "Paris 20e Arrondissement")
        True
        >>> NLU_Helper.fuzzy_match("av victor hugo paris", "Avenue Victor Hugo")
        True
        """
        q = unidecode(query.strip()).lower()
        r = unidecode(response).lower()
        if r[: len(q)] == q:
            # Response starts with query
            return True
        if sum((Counter(r) - Counter(q)).values()) < len(q):
            # Number of missing chars to match the response is low
            # compared to the query length
            return True
        return False 
開發者ID:QwantResearch,項目名稱:idunn,代碼行數:21,代碼來源:nlu_client.py

示例4: save

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def save(self, *args, **kwargs):
        """
        - Make unique slug if it is not given.
        """
        if not self.slug:
            slug = slugify(unidecode(self.title))
            duplications = Contention.objects.filter(slug=slug)
            if duplications.exists():
                self.slug = "%s-%s" % (slug, uuid4().hex)
            else:
                self.slug = slug

        if not kwargs.pop('skip_date_update', False):
            self.date_modification = datetime.now()

        return super(Contention, self).save(*args, **kwargs) 
開發者ID:arguman,項目名稱:arguman.org,代碼行數:18,代碼來源:models.py

示例5: prepare_input

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def prepare_input(self, extracted_str):
        """
        Input raw string and do transformations, as set in template file.
        """

        # Remove withspace
        if self.options["remove_whitespace"]:
            optimized_str = re.sub(" +", "", extracted_str)
        else:
            optimized_str = extracted_str

        # Remove accents
        if self.options["remove_accents"]:
            optimized_str = unidecode(optimized_str)

        # convert to lower case
        if self.options["lowercase"]:
            optimized_str = optimized_str.lower()

        # specific replace
        for replace in self.options["replace"]:
            assert len(replace) == 2, "A replace should be a list of 2 items"
            optimized_str = optimized_str.replace(replace[0], replace[1])

        return optimized_str 
開發者ID:invoice-x,項目名稱:invoice2data,代碼行數:27,代碼來源:invoice_template.py

示例6: remove_diacritics

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def remove_diacritics(s):
    """
    Removes diacritics using the `unidecode` package.

    :param: an str or unicode string
    :returns: if bytes: the same string. if str: the unidecoded string.

    >>> remove_diacritics('aéèï')
    'aeei'
    >>> remove_diacritics('aéè'.encode('utf-8'))
    b'a\\xc3\\xa9\\xc3\\xa8'
    """
    if isinstance(s, str):
        # for issue #305
        # because I have no idea what the general solution for this would be
        s = s.replace("’", "'")

        return unidecode(s)
    else:
        return s 
開發者ID:dissemin,項目名稱:dissemin,代碼行數:22,代碼來源:utils.py

示例7: get_url_markdown

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def get_url_markdown(baseurl,start,increment):
  '''
  opener = urllib2.build_opener()
  opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
  try:
    j = opener.open(baseurl)
  except:
    return None
  data = j.read()
  '''
  urlHandler = urllib2.urlopen(baseurl)
  data = urlHandler.read()
  '''
  os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
  data = open('temp' + str(start)+"_"+str(increment),'rU').read()
  '''
  h = html2text.HTML2Text()
  h.ignore_links = True
  h.ignore_images = True
  h.body_width = 10000
  data = h.handle(unidecode(unicode(data,errors='ignore')))
  return unidecode(data) 
開發者ID:schollz,項目名稱:extract_recipe,代碼行數:24,代碼來源:downloadRecipes.py

示例8: get_url_markdown

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def get_url_markdown(baseurl,start,increment):
  try:
    '''
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0')]
    try:
      j = opener.open(baseurl)
    except:
      return None
    data = j.read()
    '''
    urlHandler = urllib2.urlopen(baseurl)
    data = urlHandler.read()
    '''
    os.system('wget -O temp' + str(start)+"_"+str(increment) + ' ' + baseurl)
    data = open('temp' + str(start)+"_"+str(increment),'rU').read()
    '''
    h = html2text.HTML2Text()
    h.ignore_links = True
    h.ignore_images = True
    h.body_width = 10000
    data = h.handle(unidecode(unicode(data,errors='ignore')))
    return unidecode(data)
  except:
    return None 
開發者ID:schollz,項目名稱:extract_recipe,代碼行數:27,代碼來源:MdownloadRecipes.py

示例9: save

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def save(self, *args, **kwargs):
        if not self.slug:
            self.slug = slugify(unidecode(self.title))
        super(Group, self).save(*args, **kwargs) 
開發者ID:slyapustin,項目名稱:django-classified,代碼行數:6,代碼來源:models.py

示例10: transliterate

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def transliterate(alt_name):
    if has_unicode.search(alt_name["name"]):
        try:
            xlit = unidecode(alt_name["name"].decode("utf8"))
        except (UnicodeDecodeError, UnicodeEncodeError):
            try:
                xlit = unidecode(alt_name["name"].decode("latin1"))
            except (UnicodeEncodeError, UnicodeEncodeError):
                return
        if xlit != alt_name["name"]:
            addl_name = alt_name.copy()
            addl_name["lang"] = alt_name["lang"] + ":ascii"
            addl_name["name"] = xlit
            return addl_name 
開發者ID:LibraryOfCongress,項目名稱:gazetteer,代碼行數:16,代碼來源:core.py

示例11: remove_non_ascii

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def remove_non_ascii(text):
    try:
        return unidecode(unicode(text, encoding = "utf-8"))
    except:
        return unidecode(str(text)) 
開發者ID:yaserkl,項目名稱:TransferRL,代碼行數:7,代碼來源:cnn_dm_downloader.py

示例12: remove_non_ascii

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def remove_non_ascii(text):
    try:
        return unicode(unidecode(unicode(text, encoding = "utf-8")))
    except:
        return str(unidecode(str(text))) 
開發者ID:yaserkl,項目名稱:TransferRL,代碼行數:7,代碼來源:newsroom_data_maker.py

示例13: remove_non_ascii

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def remove_non_ascii(self, text):
    try:
      return unicode(unidecode(unicode(text, encoding="utf-8")))
    except:
      return str(unidecode(text)) 
開發者ID:yaserkl,項目名稱:TransferRL,代碼行數:7,代碼來源:decode.py

示例14: remove_diacritics

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def remove_diacritics(s):
    return unidecode(s) if type(s) == unicode else s 
開發者ID:dissemin,項目名稱:oabot,代碼行數:4,代碼來源:main.py

示例15: _transliterated

# 需要導入模塊: import unidecode [as 別名]
# 或者: from unidecode import unidecode [as 別名]
def _transliterated(method):
    def wrapper(self):
        return transliterate(method(self))

    functools.update_wrapper(wrapper, method, ["__name__", "__doc__"])
    if hasattr(method, "_nltk_compat_7bit"):
        wrapper._nltk_compat_7bit = method._nltk_compat_7bit

    wrapper._nltk_compat_transliterated = True
    return wrapper 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:12,代碼來源:compat.py


注:本文中的unidecode.unidecode方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。