本文整理汇总了Python中unicodedata.normalize函数的典型用法代码示例。如果您正苦于以下问题:Python normalize函数的具体用法?Python normalize怎么用?Python normalize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了normalize函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_cast_crew
def get_cast_crew(self,url):
request=get_file(url)
soup = BeautifulSoup(request.text)
main_dic={}
lst=[u'Cast',u'Production and Technical Credits']
for i in xrange(len(lst)):
main_dic[lst[i]]=np.nan
dic={}
try:
lst[i]=soup.findAll('div',{'id':'cast'})[i].find('h1').text
for row in soup.findAll('div',{'id':'cast'})[i].findAll('tr'):
position, filler, name = row.findAll('td')
position= unicodedata.normalize('NFKD', position.text).encode('ascii','ignore')
name = unicodedata.normalize('NFKD', name.text).encode('ascii','ignore')
if position in dic:
dic[position]+=[name]
else:
dic[position]=[name]
dic=json.dumps(dic)
except:
dic=np.nan
main_dic[lst[i]]=dic
return main_dic
示例2: crawler
def crawler():
arr=["http://www.imdb.com/title/tt0111161/?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=2398042102&pf_rd_r=07XG6QFJZEE6BBVY6J2Z&pf_rd_s=center-1&pf_rd_t=15506&pf_rd_i=top&ref_=chttp_tt_1"]
fp=open('data.csv',"w")
a=csv.writer(fp,delimiter=',',quotechar="$")
visited=[]
c=0
while c<200:
page=arr.pop()
if page not in visited:
r=requests.get(page)
soup=bs4.BeautifulSoup(r.text)
rate=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"ratingValue"}).string).encode('ascii','ignore')
n=float(rate)
if n>6.5 and n<=8.5:
c=c+1
name=unicodedata.normalize('NFKD',soup.find("h1",attrs={"itemprop":"name"}).text).encode('ascii','ignore')
year=soup.find(attrs={"id":"titleYear"}).text
director=unicodedata.normalize('NFKD',soup.find("span",attrs={"itemprop":"name"}).string).encode('ascii','ignore')
print([c,name,year,director,n])
a.writerow([c,name,year,director,n])
divs=soup.find_all('div',attrs={"class":"rec-title"})
links=[div.find('a')['href'] for div in divs]
links=[urljoin(page,link) for link in links]
arr=list(set(arr)|set(links))
visited.append(page)
fp.close()
示例3: find_all_translations
def find_all_translations(soup):
file_string = ''
for word_data in soup.find_all("td", class_="list-title"):
part_link = word_data.find("a")['href']
full_link = domain + part_link
soup2 = getSoup(full_link)
translations = soup2.find("article", class_="item-page").find_all(style="text-align: center;")
for translation in translations:
tagalog = translation.find(['b', 'strong'])
new_line = translation.find('br')
if new_line:
english = new_line.next_sibling
else:
english = None
if tagalog and english and tagalog.string and english.string is not None:
if ' ' not in tagalog.string.strip() and tagalog.string is not english.string:
file_string += unicodedata.normalize('NFD', tagalog.string.strip()).encode('ascii', 'ignore').decode("utf-8") + "\n"
file_string += unicodedata.normalize('NFD', str([word.strip() for word in english.string.strip().split(',')])).encode('ascii', 'ignore').decode("utf-8") + "\n"
file_string += "\n"
f = open('translations.txt', 'a')
f.write(file_string)
f.close()
next_page_link = soup.find('li', class_='pagination-next').find('a')['href']
print('Parsing %s...'%(domain + next_page_link))
find_all_translations(getSoup(domain + next_page_link))
示例4: ok_to_send
def ok_to_send(day_start, day_end):
now = datetime.datetime.now().time()
dstart = str.split(
unicodedata.normalize(
'NFKD', day_start).encode(
'ascii', 'ignore'), ":")
dend = str.split(
unicodedata.normalize(
'NFKD', day_end).encode(
'ascii', 'ignore'), ":")
on_time = datetime.time(int(dstart[0]), int(dstart[1]))
off_time = datetime.time(int(dend[0]), int(dend[1]))
when, matching = check_time(now, on_time, off_time)
should_I_send = False
if matching:
if when == DAY:
return True
elif when == NIGHT:
return False
else:
return False
else:
return False
示例5: test_greek_print_ipa
def test_greek_print_ipa(self):
"""Test the Word class's `_print_ipa` in Greek."""
w = grc.Word("élipe", grc.GREEK["Attic"]["Probert"])
output = [w._print_ipa(True), w._print_ipa(False)]
target = [unicodedata.normalize('NFC', "é.li.pe"),
unicodedata.normalize('NFC', "élipe")]
self.assertEqual(output, target)
示例6: freeze
def freeze(self):
"""Clean the destination and build all URLs from generators."""
remove_extra = self.app.config['FREEZER_REMOVE_EXTRA_FILES']
if not os.path.isdir(self.root):
os.makedirs(self.root)
if remove_extra:
ignore = self.app.config['FREEZER_DESTINATION_IGNORE']
previous_files = set(
# See https://github.com/SimonSapin/Frozen-Flask/issues/5
normalize('NFC', os.path.join(self.root, *name.split('/')))
for name in walk_directory(self.root, ignore=ignore))
seen_urls = set()
seen_endpoints = set()
built_files = set()
for url, endpoint in self._generate_all_urls():
seen_endpoints.add(endpoint)
if url in seen_urls:
# Don't build the same URL more than once
continue
seen_urls.add(url)
new_filename = self._build_one(url)
built_files.add(normalize('NFC', new_filename))
self._check_endpoints(seen_endpoints)
if remove_extra:
# Remove files from the previous build that are not here anymore.
for extra_file in previous_files - built_files:
os.remove(extra_file)
parent = os.path.dirname(extra_file)
if not os.listdir(parent):
# The directory is now empty, remove it.
os.removedirs(parent)
return seen_urls
示例7: test_listdir2_returns_name_stat_pairs
def test_listdir2_returns_name_stat_pairs(self):
funny_unicode = u'M\u00E4kel\u00E4'
funny_utf8 = funny_unicode.encode('utf-8')
self.fs.write_file(funny_utf8, 'data')
pairs = self.fs.listdir2('.')
self.assertEqual(len(pairs), 1)
self.assertEqual(len(pairs[0]), 2)
name_utf8, st = pairs[0]
self.assertEqual(type(name_utf8), str)
name_unicode = name_utf8.decode('utf-8')
# See https://en.wikipedia.org/wiki/Unicode_equivalence for
# background. The NFKD normalisation seems to be the best way
# to ensure things work across Linux and Mac OS X both (their
# default normalisation for filenames is different).
self.assertEqual(
unicodedata.normalize('NFKD', name_unicode),
unicodedata.normalize('NFKD', funny_unicode))
self.assertTrue(hasattr(st, 'st_mode'))
self.assertFalse(hasattr(st, 'st_mtime'))
self.assertTrue(hasattr(st, 'st_mtime_sec'))
self.assertTrue(hasattr(st, 'st_mtime_nsec'))
示例8: CrearPedidoCertificado
def CrearPedidoCertificado(self, cuit="", empresa="", nombre="pyafipws",
filename="empresa.csr"):
"Crear un certificate signing request (X509 CSR)"
from M2Crypto import RSA, EVP, X509
# create the certificate signing request (CSR):
self.x509_req = X509.Request ()
# normalizar encoding (reemplazar acentos, eñe, etc.)
if isinstance(empresa, unicode):
empresa = unicodedata.normalize('NFKD', empresa).encode('ASCII', 'ignore')
if isinstance(nombre, unicode):
nombre = unicodedata.normalize('NFKD', nombre).encode('ASCII', 'ignore')
# subjet: C=AR/O=[empresa]/CN=[nombre]/serialNumber=CUIT [nro_cuit]
x509name = X509.X509_Name ()
# default OpenSSL parameters:
kwargs = {"type": 0x1000 | 1, "len": -1, "loc": -1, "set": 0}
x509name.add_entry_by_txt(field='C', entry='AR', **kwargs)
x509name.add_entry_by_txt(field='O', entry=empresa, **kwargs)
x509name.add_entry_by_txt(field='CN', entry=nombre, **kwargs)
x509name.add_entry_by_txt(field='serialNumber', entry="CUIT %s" % str(cuit), **kwargs)
self.x509_req.set_subject_name(x509name)
# sign the request with the previously created key (CrearClavePrivada)
self.x509_req.set_pubkey (pkey=self.pkey)
self.x509_req.sign(pkey=self.pkey, md='sha256')
# save the CSR result to a file:
f = open(filename, "w")
f.write(self.x509_req.as_pem())
f.close()
return True
示例9: add_other_bank_account
def add_other_bank_account(request):
"""
function to add a receiver of another bank to which user wants to transfer the money.
It fills in all the details of the receiver and also validates them.
"""
try:
cust_id=request.session.get('user_id')
name=request.POST["name"]
connected_acc_no1=request.POST["account_no"]
confirm_acc_no=request.POST["account_no_2"]
addressline1=request.POST["line1"]
addressline2=request.POST["line2"]
addressline3=request.POST["line3"]
IFSC_code1=request.POST["IFSC"]
limit1=request.POST["limit"]
error1="Account Confirmation Failed"
error2="Please Enter Valid numbers in fields"
error3="Please Enter numeral entries in fields"
error4="Sorry The account you wish to connect does not exist"
error6="Account Already Added"
error7="IFSC code does no exists"
if(connected_acc_no1!=confirm_acc_no):
return render_to_response("add_other_bank_account.html",{'error':error1,'STATIC_URL':"/static/"})
limit=unicodedata.normalize('NFKD', limit1).encode('ascii','ignore')
connected_acc_no=unicodedata.normalize('NFKD', connected_acc_no1).encode('ascii','ignore')
IFSC_code=unicodedata.normalize('NFKD', IFSC_code1).encode('ascii','ignore')
try:
i = float(limit)
except ValueError, TypeError:
return render_to_response("add_other_bank_account.html",{'error':error3,'STATIC_URL':"/static/"})
else:
示例10: toRSSItem
def toRSSItem(self):
title = self.repo.tagname
if self.message and len(self.message) > 50: title += " - " + self.message[:50] + "..."
elif self.message: title += " - " + self.message
if self.dbkeywords: title += " - " + ",".join(self.dbkeywords)
description = "<pre>"
description += self.getpprint()
description += "</pre>"
title = unicodedata.normalize('NFKD', unicode(title, 'utf-8')).encode('ascii', 'ignore')
description = unicodedata.normalize('NFKD', unicode(description, 'utf-8')).encode('ascii', 'ignore')
link = ''
if self.repo.viewlink:
link = self.repo.viewlink.replace('%ID', self.uniqueid)
item = RSSItem(
title = title,
link = link,
description = description,
guid = Config.rooturl + "/commit/" + self.repo.tagname + "/" + self.uniqueid,
pubDate = unixToDatetime(self.date)
)
return item
示例11: normalize_token
def normalize_token(data):
# credit: http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
data = unicodedata.normalize(
"NFC", "".join((c for c in unicodedata.normalize("NFD", data) if unicodedata.category(c) != "Mn")).lower()
)
data = re.sub(ur"['’]", "", data)
return data
示例12: __init__
def __init__(self):
if xbmc:
self.RssFeedsPath = xbmc.translatePath('special://userdata/RssFeeds.xml').decode("utf-8")
else:
self.RssFeedsPath = r'C:\Documents and Settings\Xerox\Application Data\XBMC\userdata\RssFeeds.xml'
sane = self.checkRssFeedPathSanity()
if sane:
try:
self.feedsTree = parse(self.RssFeedsPath)
except:
log('[script] RSS Editor --> Failed to parse ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ))
regen = xbmcgui.Dialog().yesno(getLS(40), getLS(51), getLS(52), getLS(53))
if regen:
log('[script] RSS Editor --> Attempting to Regenerate RssFeeds.xml')
xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<rssfeeds>\n\
<!-- RSS feeds. To have multiple feeds, just add a feed to the set. You can also have multiple sets. !-->\n\
<!-- To use different sets in your skin, each must be called from skin with a unique id. !-->\n\
<set id="1">\n <feed updateinterval="30">http://feeds.feedburner.com/xbmc</feed>\n </set>\n</rssfeeds>'
f = open(self.RssFeedsPath, 'w')
f.write(xml)
f.close()
self.__init__()
else:
log('[script] RSS Editor --> User opted to not regenerate RssFeeds.xml. Script Exiting')
self.feedsTree = False
if self.feedsTree:
self.feedsList = self.getCurrentRssFeeds()
else:
self.feedsTree = False
self.feedsList = False
log('[SCRIPT] RSS Editor --> Could not open ' + unicodedata.normalize( 'NFKD', self.RssFeedsPath ).encode( 'ascii', 'ignore' ) +'. Either the file does not exist, or its size is zero.')
示例13: redirect_if_needed
def redirect_if_needed(self, i):
params = {}
need_redirect = False
for k, v in i.items():
if k in plurals:
params[k] = None
k = plurals[k]
need_redirect = True
if isinstance(v, list):
if v == []:
continue
clean = [normalize('NFC', b.strip()) for b in v]
if clean != v:
need_redirect = True
if len(clean) == 1 and clean[0] == u'':
clean = None
else:
clean = normalize('NFC', v.strip())
if clean == '':
need_redirect = True
clean = None
if clean != v:
need_redirect = True
params[k] = clean
if need_redirect:
raise web.seeother(web.changequery(**params))
示例14: noDiacritics
def noDiacritics(s):
"""Removes any diacritics"""
# sanity check
if s is None:
return None
# try the right way first
try:
strAux = unicode(s, 'utf-8')
# remove some chars
strAux = strAux.replace(unichr(0xba), "") # 4o
strAux = strAux.replace(unichr(0xaa), "") # 4a
# normalization
ret = unicodedata.normalize('NFKD', strAux)
ret = ret.encode('ascii', 'ignore')
except:
ret = None
# try as a unicode encoded string
if ret is None:
try:
strAux = s.decode(s, 'utf-8')
# remove some chars
strAux = strAux.replace(unichr(0xba), "") # 4o
strAux = strAux.replace(unichr(0xaa), "") # 4a
# normalization
ret = unicodedata.normalize('NFKD', strAux)
ret = ret.encode('ascii', 'ignore')
except:
ret = s # return as received
return ret
示例15: tokenizeComparison
def tokenizeComparison(self, given, correct):
# compare in NFC form so accents appear correct
given = ucd.normalize("NFC", given)
correct = ucd.normalize("NFC", correct)
s = difflib.SequenceMatcher(None, given, correct, autojunk=False)
givenElems = []
correctElems = []
givenPoint = 0
correctPoint = 0
offby = 0
def logBad(old, new, str, array):
if old != new:
array.append((False, str[old:new]))
def logGood(start, cnt, str, array):
if cnt:
array.append((True, str[start:start+cnt]))
for x, y, cnt in s.get_matching_blocks():
# if anything was missed in correct, pad given
if cnt and y-offby > x:
givenElems.append((False, "-"*(y-x-offby)))
offby = y-x
# log any proceeding bad elems
logBad(givenPoint, x, given, givenElems)
logBad(correctPoint, y, correct, correctElems)
givenPoint = x+cnt
correctPoint = y+cnt
# log the match
logGood(x, cnt, given, givenElems)
logGood(y, cnt, correct, correctElems)
return givenElems, correctElems