本文整理匯總了Python中urlparse.html方法的典型用法代碼示例。如果您正苦於以下問題:Python urlparse.html方法的具體用法?Python urlparse.html怎麽用?Python urlparse.html使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類urlparse
的用法示例。
在下文中一共展示了urlparse.html方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _start_link
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _start_link(self, attrsD):
attrsD.setdefault('rel', u'alternate')
if attrsD['rel'] == u'self':
attrsD.setdefault('type', u'application/atom+xml')
else:
attrsD.setdefault('type', u'text/html')
context = self._getContext()
attrsD = self._itsAnHrefDamnIt(attrsD)
if 'href' in attrsD:
attrsD['href'] = self.resolveURI(attrsD['href'])
expectingText = self.infeed or self.inentry or self.insource
context.setdefault('links', [])
if not (self.inentry and self.inimage):
context['links'].append(FeedParserDict(attrsD))
if 'href' in attrsD:
expectingText = 0
if (attrsD.get('rel') == u'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
context['link'] = attrsD['href']
else:
self.push('link', expectingText)
示例2: _l2bytes
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _l2bytes(l):
return bytes(l)
# If you want feedparser to allow all URL schemes, set this to ()
# List culled from Python's urlparse documentation at:
# http://docs.python.org/library/urlparse.html
# as well as from "URI scheme" at Wikipedia:
# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
# Many more will likely need to be added!
示例3: mapContentType
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def mapContentType(self, contentType):
contentType = contentType.lower()
if contentType == 'text' or contentType == 'plain':
contentType = u'text/plain'
elif contentType == 'html':
contentType = u'text/html'
elif contentType == 'xhtml':
contentType = u'application/xhtml+xml'
return contentType
示例4: _start_description
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _start_description(self, attrsD):
context = self._getContext()
if 'summary' in context:
self._summaryKey = 'content'
self._start_content(attrsD)
else:
self.pushContent('description', attrsD, u'text/html', self.infeed or self.inentry or self.insource)
示例5: _start_content_encoded
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _start_content_encoded(self, attrsD):
self.pushContent('content', attrsD, u'text/html', 1)
示例6: _parse_date_rfc822
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _parse_date_rfc822(dt):
"""Parse RFC 822 dates and times, with one minor
difference: years may be 4DIGIT or 2DIGIT.
http://tools.ietf.org/html/rfc822#section-5"""
try:
m = _rfc822_match(dt.lower()).groupdict(0)
except AttributeError:
return None
return _parse_date_group_rfc822(m)
示例7: _parse_date_rfc822
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def _parse_date_rfc822(dt):
"""Parse RFC 822 dates and times, with one minor
difference: years may be 4DIGIT or 2DIGIT.
http://tools.ietf.org/html/rfc822#section-5"""
try:
m = _rfc822_match(dt.lower()).groupdict(0)
except AttributeError:
return None
# Calculate a date and timestamp
for k in ('year', 'day', 'hour', 'minute', 'second'):
m[k] = int(m[k])
m['month'] = _rfc822_months.index(m['month']) + 1
# If the year is 2 digits, assume everything in the 90's is the 1990's
if m['year'] < 100:
m['year'] += (1900, 2000)[m['year'] < 90]
stamp = datetime.datetime(*[m[i] for i in
('year', 'month', 'day', 'hour', 'minute', 'second')])
# Use the timezone information to calculate the difference between
# the given date and timestamp and Universal Coordinated Time
tzhour = 0
tzmin = 0
if m['tz'] and m['tz'].startswith('gmt'):
# Handle GMT and GMT+hh:mm timezone syntax (the trailing
# timezone info will be handled by the next `if` block)
m['tz'] = ''.join(m['tz'][3:].split(':')) or 'gmt'
if not m['tz']:
pass
elif m['tz'].startswith('+'):
tzhour = int(m['tz'][1:3])
tzmin = int(m['tz'][3:])
elif m['tz'].startswith('-'):
tzhour = int(m['tz'][1:3]) * -1
tzmin = int(m['tz'][3:]) * -1
else:
tzhour = _rfc822_tznames[m['tz']]
delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour)
# Return the date and timestamp in UTC
return (stamp - delta).utctimetuple()
示例8: unknown_starttag
# 需要導入模塊: import urlparse [as 別名]
# 或者: from urlparse import html [as 別名]
def unknown_starttag(self, tag, attrs):
acceptable_attributes = self.acceptable_attributes
keymap = {}
if not tag in self.acceptable_elements or self.svgOK:
if tag in self.unacceptable_elements_with_end_tag:
self.unacceptablestack += 1
# add implicit namespaces to html5 inline svg/mathml
if self._type.endswith('html'):
if not dict(attrs).get('xmlns'):
if tag=='svg':
attrs.append( ('xmlns','http://www.w3.org/2000/svg') )
if tag=='math':
attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') )
# not otherwise acceptable, perhaps it is MathML or SVG?
if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs:
self.mathmlOK += 1
if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs:
self.svgOK += 1
# chose acceptable attributes based on tag class, else bail
if self.mathmlOK and tag in self.mathml_elements:
acceptable_attributes = self.mathml_attributes
elif self.svgOK and tag in self.svg_elements:
# for most vocabularies, lowercasing is a good idea. Many
# svg elements, however, are camel case
if not self.svg_attr_map:
lower=[attr.lower() for attr in self.svg_attributes]
mix=[a for a in self.svg_attributes if a not in lower]
self.svg_attributes = lower
self.svg_attr_map = dict([(a.lower(),a) for a in mix])
lower=[attr.lower() for attr in self.svg_elements]
mix=[a for a in self.svg_elements if a not in lower]
self.svg_elements = lower
self.svg_elem_map = dict([(a.lower(),a) for a in mix])
acceptable_attributes = self.svg_attributes
tag = self.svg_elem_map.get(tag,tag)
keymap = self.svg_attr_map
elif not tag in self.acceptable_elements:
return
# declare xlink namespace, if needed
if self.mathmlOK or self.svgOK:
if filter(lambda (n,v): n.startswith('xlink:'),attrs):
if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs:
attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink'))