当前位置: 首页>>代码示例>>Python>>正文


Python unicodedata.normalize函数代码示例

本文整理汇总了Python中unicodedata.normalize函数的典型用法代码示例。如果您正苦于以下问题:Python normalize函数的具体用法?Python normalize怎么用?Python normalize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了normalize函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_cast_crew

    def get_cast_crew(self,url):
        request=get_file(url)
        soup = BeautifulSoup(request.text)
        main_dic={}

        lst=[u'Cast',u'Production and Technical Credits']
        for i in xrange(len(lst)):
            main_dic[lst[i]]=np.nan
            dic={}
            try:
                lst[i]=soup.findAll('div',{'id':'cast'})[i].find('h1').text
                for row in soup.findAll('div',{'id':'cast'})[i].findAll('tr'):
                    position, filler, name = row.findAll('td')
                    position= unicodedata.normalize('NFKD', position.text).encode('ascii','ignore')
                    name = unicodedata.normalize('NFKD', name.text).encode('ascii','ignore')
                    if position in dic:
                        dic[position]+=[name]
                    else:
                        dic[position]=[name]
                dic=json.dumps(dic)
            except:
                dic=np.nan

            main_dic[lst[i]]=dic
        return main_dic
开发者ID:fx2323,项目名称:project_luther,代码行数:25,代码来源:the_numbers_web_crawler.py

示例2: crawler

def crawler():
        arr=["http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=07XG6QFJZEE6BBVY6J2Z&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1"]
        fp=open('data.csv',"w")
        a=csv.writer(fp,delimiter=',',quotechar="$")
        visited=[]
        c=0
        while c<200:
            page=arr.pop()
            if page not in visited: 
                r=requests.get(page)
                soup=bs4.BeautifulSoup(r.text)
                rate=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"ratingValue"}).string).encode('ascii','ignore')
                n=float(rate)
                if n>6.5 and n<=8.5:
                    c=c+1
                    name=unicodedata.normalize('NFKD',soup.find("h1",attrs={"itemprop":"name"}).text).encode('ascii','ignore')
                    year=soup.find(attrs={"id":"titleYear"}).text
                    director=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"name"}).string).encode('ascii','ignore')
                    print([c,name,year,director,n])
                    a.writerow([c,name,year,director,n])
                divs=soup.find_all('div',attrs={"class":"rec-title"})
                links=[div.find('a')['href'] for div in divs]
                links=[urljoin(page,link) for link in links]
                arr=list(set(arr)|set(links))
                visited.append(page)
        fp.close()
开发者ID:meenal5gupta,项目名称:imdb-crawler,代码行数:26,代码来源:imdb-crawler.py

示例3: find_all_translations

def find_all_translations(soup):
	file_string = ''

	for word_data in soup.find_all("td", class_="list-title"):
		part_link = word_data.find("a")['href']
		full_link = domain + part_link

		soup2 = getSoup(full_link)

		translations = soup2.find("article", class_="item-page").find_all(style="text-align: center;")

		for translation in translations:
			tagalog = translation.find(['b', 'strong'])
			new_line = translation.find('br')

			if new_line:
				english = new_line.next_sibling
			else:
				english = None

			if tagalog and english and tagalog.string and english.string is not None:
				if ' ' not in tagalog.string.strip() and tagalog.string is not english.string:
					file_string += unicodedata.normalize('NFD', tagalog.string.strip()).encode('ascii', 'ignore').decode("utf-8") + "\n"
					file_string += unicodedata.normalize('NFD', str([word.strip() for word in english.string.strip().split(',')])).encode('ascii', 'ignore').decode("utf-8") + "\n"
					file_string += "\n"

	f = open('translations.txt', 'a')
	f.write(file_string)
	f.close()

	next_page_link = soup.find('li', class_='pagination-next').find('a')['href']

	print('Parsing %s...'%(domain + next_page_link))
	find_all_translations(getSoup(domain + next_page_link))
开发者ID:RoseySoft,项目名称:auto-downloaders,代码行数:34,代码来源:filipino-dictionary-downloader.py

示例4: ok_to_send

def ok_to_send(day_start, day_end):
    now = datetime.datetime.now().time()
    dstart = str.split(
        unicodedata.normalize(
            'NFKD', day_start).encode(
                'ascii', 'ignore'), ":")

    dend = str.split(
        unicodedata.normalize(
            'NFKD', day_end).encode(
                'ascii', 'ignore'), ":")

    on_time = datetime.time(int(dstart[0]), int(dstart[1]))
    off_time = datetime.time(int(dend[0]), int(dend[1]))
    when, matching = check_time(now, on_time, off_time)
    should_I_send = False
    if matching:
        if when == DAY:
            return True
        elif when == NIGHT:
            return False
        else:
            return False
    else:
        return False
开发者ID:kcjuntunen,项目名称:arduino_log,代码行数:25,代码来源:utility.py

示例5: test_greek_print_ipa

 def test_greek_print_ipa(self):
     """Test the Word class's `_print_ipa` in Greek."""
     w = grc.Word("élipe", grc.GREEK["Attic"]["Probert"])
     output = [w._print_ipa(True), w._print_ipa(False)]
     target = [unicodedata.normalize('NFC', "é.li.pe"),
                 unicodedata.normalize('NFC', "élipe")]
     self.assertEqual(output, target)
开发者ID:TylerKirby,项目名称:cltk,代码行数:7,代码来源:test_phonology.py

示例6: freeze

    def freeze(self):
        """Clean the destination and build all URLs from generators."""
        remove_extra = self.app.config['FREEZER_REMOVE_EXTRA_FILES']
        if not os.path.isdir(self.root):
            os.makedirs(self.root)
        if remove_extra:
            ignore = self.app.config['FREEZER_DESTINATION_IGNORE']
            previous_files = set(
                # See https://github.com/SimonSapin/Frozen-Flask/issues/5
                normalize('NFC', os.path.join(self.root, *name.split('/')))
                for name in walk_directory(self.root, ignore=ignore))
        seen_urls = set()
        seen_endpoints = set()
        built_files = set()

        for url, endpoint in self._generate_all_urls():
            seen_endpoints.add(endpoint)
            if url in seen_urls:
                # Don't build the same URL more than once
                continue
            seen_urls.add(url)
            new_filename = self._build_one(url)
            built_files.add(normalize('NFC', new_filename))

        self._check_endpoints(seen_endpoints)
        if remove_extra:
            # Remove files from the previous build that are not here anymore.
            for extra_file in previous_files - built_files:
                os.remove(extra_file)
                parent = os.path.dirname(extra_file)
                if not os.listdir(parent):
                    # The directory is now empty, remove it.
                    os.removedirs(parent)
        return seen_urls
开发者ID:meantheory,项目名称:Frozen-Flask,代码行数:34,代码来源:__init__.py

示例7: test_listdir2_returns_name_stat_pairs

    def test_listdir2_returns_name_stat_pairs(self):
        funny_unicode = u'M\u00E4kel\u00E4'
        funny_utf8 = funny_unicode.encode('utf-8')

        self.fs.write_file(funny_utf8, 'data')
        pairs = self.fs.listdir2('.')
        self.assertEqual(len(pairs), 1)
        self.assertEqual(len(pairs[0]), 2)
        name_utf8, st = pairs[0]

        self.assertEqual(type(name_utf8), str)
        name_unicode = name_utf8.decode('utf-8')

        # See https://en.wikipedia.org/wiki/Unicode_equivalence for
        # background. The NFKD normalisation seems to be the best way
        # to ensure things work across Linux and Mac OS X both (their
        # default normalisation for filenames is different).
        self.assertEqual(
            unicodedata.normalize('NFKD', name_unicode),
            unicodedata.normalize('NFKD', funny_unicode))

        self.assertTrue(hasattr(st, 'st_mode'))
        self.assertFalse(hasattr(st, 'st_mtime'))
        self.assertTrue(hasattr(st, 'st_mtime_sec'))
        self.assertTrue(hasattr(st, 'st_mtime_nsec'))
开发者ID:obnam-mirror,项目名称:obnam,代码行数:25,代码来源:vfs.py

示例8: CrearPedidoCertificado

    def CrearPedidoCertificado(self, cuit="", empresa="", nombre="pyafipws",
                                     filename="empresa.csr"):
        "Crear un certificate signing request (X509 CSR)"
        from M2Crypto import RSA, EVP, X509

        # create the certificate signing request (CSR):
        self.x509_req = X509.Request ()

        # normalizar encoding (reemplazar acentos, eñe, etc.)
        if isinstance(empresa, unicode):
            empresa = unicodedata.normalize('NFKD', empresa).encode('ASCII', 'ignore')
        if isinstance(nombre, unicode):
            nombre = unicodedata.normalize('NFKD', nombre).encode('ASCII', 'ignore')

        # subjet: C=AR/O=[empresa]/CN=[nombre]/serialNumber=CUIT [nro_cuit]
        x509name = X509.X509_Name ()
        # default OpenSSL parameters:
        kwargs = {"type": 0x1000 | 1, "len": -1, "loc": -1, "set": 0}
        x509name.add_entry_by_txt(field='C', entry='AR', **kwargs)
        x509name.add_entry_by_txt(field='O', entry=empresa, **kwargs)
        x509name.add_entry_by_txt(field='CN', entry=nombre, **kwargs)
        x509name.add_entry_by_txt(field='serialNumber', entry="CUIT %s" % str(cuit), **kwargs)     
        self.x509_req.set_subject_name(x509name)

        # sign the request with the previously created key (CrearClavePrivada)
        self.x509_req.set_pubkey (pkey=self.pkey)
        self.x509_req.sign(pkey=self.pkey, md='sha256')
        # save the CSR result to a file:
        f = open(filename, "w")
        f.write(self.x509_req.as_pem())
        f.close()
        return True
开发者ID:psgreco,项目名称:pyafipws,代码行数:32,代码来源:wsaa.py

示例9: add_other_bank_account

def add_other_bank_account(request):
  """
  function to add a receiver of another bank to which user wants to transfer the money.
  It fills in all the details of the receiver and also validates them.
  """
  try:
    cust_id=request.session.get('user_id')
    name=request.POST["name"]
    connected_acc_no1=request.POST["account_no"]
    confirm_acc_no=request.POST["account_no_2"]
    addressline1=request.POST["line1"]
    addressline2=request.POST["line2"]
    addressline3=request.POST["line3"]
    IFSC_code1=request.POST["IFSC"]
    limit1=request.POST["limit"]

    error1="Account Confirmation Failed"
    error2="Please Enter Valid numbers in fields"
    error3="Please Enter numeral entries in fields"
    error4="Sorry The account you wish to connect does not exist"
    error6="Account Already Added"
    error7="IFSC code does no exists"
    if(connected_acc_no1!=confirm_acc_no):
	return render_to_response("add_other_bank_account.html",{'error':error1,'STATIC_URL':"/static/"})
    limit=unicodedata.normalize('NFKD', limit1).encode('ascii','ignore')
    connected_acc_no=unicodedata.normalize('NFKD', connected_acc_no1).encode('ascii','ignore')
    IFSC_code=unicodedata.normalize('NFKD', IFSC_code1).encode('ascii','ignore')
    try:
    	i = float(limit)
    except ValueError, TypeError:
    	return render_to_response("add_other_bank_account.html",{'error':error3,'STATIC_URL':"/static/"})
    else:
开发者ID:pgiitu,项目名称:Online_transactions_iteration2,代码行数:32,代码来源:views.py

示例10: toRSSItem

	def toRSSItem(self):
		title = self.repo.tagname
		if self.message and len(self.message) > 50: title += " - " + self.message[:50] + "..."
		elif self.message: title += " - " + self.message
		if self.dbkeywords: title += " - " + ",".join(self.dbkeywords)
		
		description  = "<pre>"
		description += self.getpprint()
		description += "</pre>"
		
		title = unicodedata.normalize('NFKD', unicode(title, 'utf-8')).encode('ascii', 'ignore')
		description = unicodedata.normalize('NFKD', unicode(description, 'utf-8')).encode('ascii', 'ignore')

		link = ''
		if self.repo.viewlink:
			link = self.repo.viewlink.replace('%ID', self.uniqueid)

		item = RSSItem(
			title = title,
			link = link,
			description = description,
			guid = Config.rooturl + "/commit/" + self.repo.tagname + "/" + self.uniqueid,
			pubDate = unixToDatetime(self.date)
			)
		return item
开发者ID:sirvaliance,项目名称:code-audit-feed,代码行数:25,代码来源:commit.py

示例11: normalize_token

def normalize_token(data):
    # credit: http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
    data = unicodedata.normalize(
        "NFC", "".join((c for c in unicodedata.normalize("NFD", data) if unicodedata.category(c) != "Mn")).lower()
    )
    data = re.sub(ur"['’]", "", data)
    return data
开发者ID:bydesign,项目名称:openscriptures,代码行数:7,代码来源:import_helpers.py

示例12: __init__

 def __init__(self):
     if xbmc:
         self.RssFeedsPath = xbmc.translatePath('special://userdata/RssFeeds.xml').decode("utf-8")
     else:
         self.RssFeedsPath = r'C:\Documents and Settings\Xerox\Application Data\XBMC\userdata\RssFeeds.xml'
     sane = self.checkRssFeedPathSanity()
     if sane:
         try:
             self.feedsTree = parse(self.RssFeedsPath)
         except:
             log('[script] RSS Editor --> Failed to parse ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ))
             regen = xbmcgui.Dialog().yesno(getLS(40), getLS(51), getLS(52), getLS(53))
             if regen:
                 log('[script] RSS Editor --> Attempting to Regenerate RssFeeds.xml')
                 xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<rssfeeds>\n\
                 <!-- RSS feeds. To have multiple feeds, just add a feed to the set. You can also have multiple sets. 	!-->\n\
                 <!-- To use different sets in your skin, each must be called from skin with a unique id.             	!-->\n\
                 <set id="1">\n    <feed updateinterval="30">http://feeds.feedburner.com/xbmc</feed>\n  </set>\n</rssfeeds>'
                 f = open(self.RssFeedsPath, 'w')
                 f.write(xml)
                 f.close()
                 self.__init__()
             else:
                 log('[script] RSS Editor --> User opted to not regenerate RssFeeds.xml.  Script Exiting')
                 self.feedsTree = False
         if self.feedsTree:
             self.feedsList = self.getCurrentRssFeeds()
     else:
         self.feedsTree = False
         self.feedsList = False
         log('[SCRIPT] RSS Editor --> Could not open ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ) +'. Either the file does not exist, or its size is zero.')
开发者ID:noba3,项目名称:KoTos,代码行数:31,代码来源:xmlParser.py

示例13: redirect_if_needed

 def redirect_if_needed(self, i):
     params = {}
     need_redirect = False
     for k, v in i.items():
         if k in plurals:
             params[k] = None
             k = plurals[k]
             need_redirect = True
         if isinstance(v, list):
             if v == []:
                 continue
             clean = [normalize('NFC', b.strip()) for b in v]
             if clean != v:
                 need_redirect = True
             if len(clean) == 1 and clean[0] == u'':
                 clean = None
         else:
             clean = normalize('NFC', v.strip())
             if clean == '':
                 need_redirect = True
                 clean = None
             if clean != v:
                 need_redirect = True
         params[k] = clean
     if need_redirect:
         raise web.seeother(web.changequery(**params))
开发者ID:bfalling,项目名称:openlibrary,代码行数:26,代码来源:code.py

示例14: noDiacritics

def noDiacritics(s):
    """Removes any diacritics"""

    # sanity check
    if s is None:
        return None

    # try the right way first
    try:
        strAux = unicode(s, 'utf-8')
        # remove some chars
        strAux = strAux.replace(unichr(0xba), "")     # 4o
        strAux = strAux.replace(unichr(0xaa), "")     # 4a
        # normalization
        ret = unicodedata.normalize('NFKD', strAux)
        ret = ret.encode('ascii', 'ignore')
    except:
        ret = None

    # try as a unicode encoded string
    if ret is None:
        try:
            strAux = s.decode(s, 'utf-8')
            # remove some chars
            strAux = strAux.replace(unichr(0xba), "")     # 4o
            strAux = strAux.replace(unichr(0xaa), "")     # 4a
            # normalization
            ret = unicodedata.normalize('NFKD', strAux)
            ret = ret.encode('ascii', 'ignore')
        except:
            ret = s     # return as received

    return ret
开发者ID:MGDevelopment,项目名称:library,代码行数:33,代码来源:__init__.py

示例15: tokenizeComparison

 def tokenizeComparison(self, given, correct):
     # compare in NFC form so accents appear correct
     given = ucd.normalize("NFC", given)
     correct = ucd.normalize("NFC", correct)
     s = difflib.SequenceMatcher(None, given, correct, autojunk=False)
     givenElems = []
     correctElems = []
     givenPoint = 0
     correctPoint = 0
     offby = 0
     def logBad(old, new, str, array):
         if old != new:
             array.append((False, str[old:new]))
     def logGood(start, cnt, str, array):
         if cnt:
             array.append((True, str[start:start+cnt]))
     for x, y, cnt in s.get_matching_blocks():
         # if anything was missed in correct, pad given
         if cnt and y-offby > x:
             givenElems.append((False, "-"*(y-x-offby)))
             offby = y-x
         # log any proceeding bad elems
         logBad(givenPoint, x, given, givenElems)
         logBad(correctPoint, y, correct, correctElems)
         givenPoint = x+cnt
         correctPoint = y+cnt
         # log the match
         logGood(x, cnt, given, givenElems)
         logGood(y, cnt, correct, correctElems)
     return givenElems, correctElems
开发者ID:ACEfanatic02,项目名称:anki,代码行数:30,代码来源:reviewer.py


注:本文中的unicodedata.normalize函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。