本文整理汇总了Python中sickbeard.helpers.parse_xml函数的典型用法代码示例。如果您正苦于以下问题:Python parse_xml函数的具体用法?Python parse_xml怎么用?Python parse_xml使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_xml函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_newznab_categories
def get_newznab_categories(self):
"""
Uses the newznab provider url and apikey to get the capabilities.
Makes use of the default newznab caps param. e.a. http://yournewznab/api?t=caps&apikey=skdfiw7823sdkdsfjsfk
Returns a tuple with (succes or not, array with dicts [{"id": "5070", "name": "Anime"},
{"id": "5080", "name": "Documentary"}, {"id": "5020", "name": "Foreign"}...etc}], error message)
"""
return_categories = []
self._checkAuth()
params = {"t": "caps"}
if self.needs_auth and self.key:
params['apikey'] = self.key
categories = self.getURL("%s/api" % (self.url), params=params)
xml_categories = helpers.parse_xml(categories)
if not xml_categories:
return (False, return_categories, "Error parsing xml for [%s]" % (self.name))
try:
for category in xml_categories.iter('category'):
if category.get('name') == 'TV':
for subcat in category.findall('subcat'):
return_categories.append(subcat.attrib)
except:
return (False, return_categories, "Error parsing result for [%s]" % (self.name))
return (True, return_categories, "")
示例2: _parseKatRSS
def _parseKatRSS(self, data):
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " RSS feed", logger.ERROR)
return []
items = parsedXML.findall('.//item')
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if not title or not url:
logger.log(u"The XML returned from the KAT RSS feed is incomplete, this result is unusable: "+data, logger.ERROR)
continue
if self._get_seeders(curItem) <= 0:
logger.log(u"Discarded result with no seeders: " + title, logger.DEBUG)
continue
if self.urlIsBlacklisted(url):
logger.log(u'URL "%s" for "%s" is blacklisted, ignoring.' % (url, title), logger.DEBUG)
continue
results.append(curItem)
return results
示例3: _doSearch
def _doSearch(self, search_string, show=None, max_age=0):
params = {"q":search_string,
"max": 100,
"hidespam": 1,
"minsize":350,
"nzblink":1,
"complete": 1,
"sort": "agedesc",
"age": sickbeard.USENET_RETENTION}
# if max_age is set, use it, don't allow it to be missing
if max_age or not params['age']:
params['age'] = max_age
searchURL = self.url + self.rss + urllib.urlencode(params)
logger.log(u"Search url: " + searchURL)
retry = 0
while True:
logger.log(u"Sleeping 3 seconds to respect NZBIndex's rules")
time.sleep(3)
data = self.getURL(searchURL)
if not data:
logger.log(u"No data returned from " + searchURL, logger.ERROR)
return []
if type(data) == type({}) and data['errorCode']:
if retry < self.max_retries:
logger.log(u"Retry " + str(retry + 1) + " from " + str(self.max_retries) + "...", logger.WARNING)
retry += 1
else:
logger.log(u"Max retries reached!", logger.ERROR)
return []
else:
break
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " XML data", logger.ERROR)
return []
if parsedXML.tag == 'rss':
items = parsedXML.findall('.//item')
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if title and url:
logger.log(u"Adding item from RSS to results: " + title, logger.DEBUG)
results.append(curItem)
else:
logger.log(u"The XML returned from the " + self.name + " RSS feed is incomplete, this result is unusable", logger.DEBUG)
return results
示例4: _doSearch
def _doSearch(self, curString, quotes=False, show=None):
term = re.sub('[\.\-\:]', ' ', curString).encode('utf-8')
self.searchString = term
if quotes:
term = "\""+term+"\""
params = {"q": term,
"rpp": 50, #max 50
"ns": 1, #nospam
"szs":16, #min 100MB
"sp":1 #nopass
}
searchURL = "http://nzbclub.com/nzbfeed.aspx?" + urllib.urlencode(params)
logger.log(u"Search string: " + searchURL)
logger.log(u"Sleeping 4 seconds to respect NZBClub's rules")
time.sleep(4)
searchResult = self.getURL(searchURL,[("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0"),("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),("Accept-Language","de-de,de;q=0.8,en-us;q=0.5,en;q=0.3"),("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"),("Connection","keep-alive"),("Cache-Control","max-age=0")])
if not searchResult:
return []
parsedXML = helpers.parse_xml(searchResult)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " XML data", logger.ERROR)
return []
if parsedXML.tag == 'rss':
items = parsedXML.findall('.//item')
else:
logger.log(u"Resulting XML from " + self.name + " isn't RSS, not parsing it", logger.ERROR)
return []
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if not title or not url:
logger.log(u"The XML returned from the NZBClub RSS feed is incomplete, this result is unusable", logger.ERROR)
continue
if not title == 'Not_Valid':
results.append(curItem)
return results
示例5: verifyRss
def verifyRss(self):
"""Runs some basic validation on the rss url.
@return: (bool, string) Returns a tuple. The bool indicates success, the string will
give a reason for failure if the boolean is false.
"""
try:
data = self.getURL(self.url)
if not data:
return (False, "No data returned from url: " + self.url)
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
return (False, "Unable to parse RSS - is it valid xml? " + self.url)
items = parsedXML.findall(".//item")
if len(items) == 0:
# Maybe this isn't really a failure? Not sure what's best here
return (False, "There were no items in the RSS feed from %s" % self.url)
checkItem = items[0]
(title, url) = self._get_title_and_url(checkItem)
if not title:
return (False, "Failed to get title from first item in feed.")
if not url:
return (False, "Failed to get torrent url from first item in feed.")
if url.startswith("magnet:"):
# we just assume that magnet links are ok
return (True, "First torrent appears to be ok")
else:
torrentFile = self.getURL(url)
if torrentFile == None:
return (False, "Empty torrent file when downloading first torrent in feed.")
if not self.is_valid_torrent_data(torrentFile):
return (
False,
"First torrent in feed does not appear to be valid torrent file (wrong magic number)",
)
return (True, "First torrent in feed verified successfully")
except Exception, e:
return (False, "Error when trying to load RSS: " + ex(e))
示例6: updateCache
def updateCache(self):
if not self.shouldUpdate():
return
if self._checkAuth(None):
data = self._getRSSData()
# as long as the http request worked we count this as an update
if data:
self.setLastUpdate()
else:
return []
# now that we've loaded the current RSS feed lets delete the old cache
logger.log(u"Clearing " + self.provider.name + " cache and updating with new information")
self._clearCache()
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.provider.name + " RSS feed", logger.ERROR)
return []
if self._checkAuth(parsedXML):
if parsedXML.tag == 'rss':
items = parsedXML.findall('.//item')
else:
logger.log(u"Resulting XML from " + self.provider.name + " isn't RSS, not parsing it", logger.ERROR)
return []
cl = []
for item in items:
ci = self._parseItem(item)
if ci is not None:
cl.append(ci)
if len(cl) > 0:
myDB = self._getDB()
myDB.mass_action(cl)
else:
raise AuthException(u"Your authentication credentials for " + self.provider.name + " are incorrect, check your config")
return []
示例7: get_newznab_categories
def get_newznab_categories(self):
"""
Uses the newznab provider url and apikey to get the capabilities.
Makes use of the default newznab caps param. e.a. http://yournewznab/api?t=caps&apikey=skdfiw7823sdkdsfjsfk
Returns a tuple with (succes or not, array with dicts [{"id": "5070", "name": "Anime"},
{"id": "5080", "name": "Documentary"}, {"id": "5020", "name": "Foreign"}...etc}], error message)
"""
return_categories = []
api_key = self._check_auth()
params = {"t": "caps"}
if isinstance(api_key, basestring):
params["apikey"] = api_key
categories = self.get_url("%s/api" % self.url, params=params, timeout=10)
if not categories:
logger.log(
u"Error getting html for [%s/api?%s]"
% (self.url, "&".join("%s=%s" % (x, y) for x, y in params.items())),
logger.DEBUG,
)
return (
False,
return_categories,
"Error getting html for [%s]"
% ("%s/api?%s" % (self.url, "&".join("%s=%s" % (x, y) for x, y in params.items()))),
)
xml_categories = helpers.parse_xml(categories)
if not xml_categories:
logger.log(u"Error parsing xml for [%s]" % self.name, logger.DEBUG)
return False, return_categories, "Error parsing xml for [%s]" % self.name
try:
for category in xml_categories.iter("category"):
if "TV" == category.get("name"):
for subcat in category.findall("subcat"):
return_categories.append(subcat.attrib)
except:
logger.log(u"Error parsing result for [%s]" % self.name, logger.DEBUG)
return False, return_categories, "Error parsing result for [%s]" % self.name
return True, return_categories, ""
示例8: _doSearch
def _doSearch(self, search_params, show=None, age=None):
params = {"mode": "rss"}
if search_params:
params.update(search_params)
search_url = self.url + "search/index.php?" + urllib.urlencode(params)
logger.log(u"Search string: " + search_url, logger.DEBUG)
data = self.getURL(search_url)
if not data:
logger.log(u"No data returned from " + search_url, logger.ERROR)
return []
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " RSS feed", logger.ERROR)
return []
items = parsedXML.findall(".//item")
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if title and url:
logger.log(u"Adding item from RSS to results: " + title, logger.DEBUG)
results.append(curItem)
else:
logger.log(
u"The XML returned from the " + self.name + " RSS feed is incomplete, this result is unusable",
logger.ERROR,
)
return results
示例9: _doSearch
def _doSearch(self, search_params, show=None):
params = {"mode": "rss"}
if search_params:
params.update(search_params)
search_url = self.url + 'search/index.php?' + urllib.urlencode(params)
logger.log(u"Search string: " + search_url, logger.DEBUG)
data = self.getURL(search_url)
if not data:
logger.log(u"No data returned from " + search_url, logger.ERROR)
return []
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " RSS feed", logger.ERROR)
return []
items = parsedXML.findall('.//item')
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if not title or not url:
logger.log(u"The XML returned from the EZRSS RSS feed is incomplete, this result is unusable: "+data, logger.ERROR)
continue
if self.urlIsBlacklisted(url):
logger.log(u'URL "%s" for "%s" is blacklisted. Ignoring.' % (url, title), logger.DEBUG)
continue
results.append(curItem)
return results
示例10: validateRSS
def validateRSS(self):
try:
data = self.cache._getRSSData()
if not data:
return (False, 'No data returned from url: ' + self.url)
parsedXML = helpers.parse_xml(data)
if not parsedXML:
return (False, 'Unable to parse RSS, is it a real RSS? ')
items = parsedXML.findall('.//item')
if not items:
return (False, 'No items found in the RSS feed ' + self.url)
(title, url) = self._get_title_and_url(items[0])
if not title:
return (False, 'Unable to get title from first item')
if not url:
return (False, 'Unable to get torrent url from first item')
if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url):
return (True, 'RSS feed Parsed correctly')
else:
torrent_file = self.getURL(url)
try:
bdecode(torrent_file)
except Exception, e:
self.dumpHTML(torrent_file)
return (False, 'Torrent link is not a valid torrent file: ' + ex(e))
return (True, 'RSS feed Parsed correctly')
示例11: _getRSSData
def _getRSSData(self):
RSS_data = None
xml_element_tree = None
for url in [self.provider.url + 'rss/?sec=tv-x264&fr=false', self.provider.url + 'rss/?sec=tv-dvd&fr=false']:
logger.log(u"Womble's Index cache update URL: " + url, logger.DEBUG)
data = self.provider.getURL(url)
if data:
parsedXML = helpers.parse_xml(data)
if parsedXML:
if xml_element_tree is None:
xml_element_tree = parsedXML
else:
items = parsedXML.findall('.//item')
if items:
for item in items:
xml_element_tree.append(item)
if xml_element_tree is not None:
RSS_data = etree.tostring(xml_element_tree)
return RSS_data
示例12: _feed_is_valid
def _feed_is_valid(feed):
#logger.log(u"Checking feed: " + repr(feed), logger.DEBUG)
try:
if feed is None:
logger.log(u"Feed result is empty!", logger.ERROR)
return False
parsedXML = helpers.parse_xml(feed)
if parsedXML is None:
logger.log(u"Resulting XML isn't XML, not parsing it", logger.ERROR)
return False
else:
items = parsedXML.findall('.//item')
if len(items) > 0:
item = items[0]
pubDate = helpers.get_xml_text(item.find('pubDate'))
# pubDate has a timezone, but it makes things much easier if
# we ignore it (and we don't need that level of accuracy anyway)
p_datetime = parser.parse(pubDate, ignoretz=True)
p_delta = datetime.now() - p_datetime
if p_delta.days > 3:
logger.log(u"Last entry in feed (after early parse) is %d days old - assuming feed is broken"%(p_delta.days), logger.MESSAGE)
return False
else:
return True
else:
logger.log(u"Feed contents are rss (during early parse) but are empty, assuming failure.", logger.MESSAGE)
return False
except Exception, e:
logger.log(u"Error during early parse of feed: " + ex(e), logger.ERROR)
logger.log(u"Feed contents: " + repr(feed), logger.DEBUG)
return False
示例13: _doSearch
def _doSearch(self, search_params, show=None, max_age=0):
self._checkAuth()
params = {"t": "tvsearch",
"maxage": sickbeard.USENET_RETENTION,
"limit": 100,
"cat": self.catIDs}
# if max_age is set, use it, don't allow it to be missing
if max_age or not params['maxage']:
params['maxage'] = max_age
if search_params:
params.update(search_params)
if self.needs_auth and self.key:
params['apikey'] = self.key
search_url = self.url + 'api?' + urllib.urlencode(params)
logger.log(u"Search url: " + search_url, logger.DEBUG)
data = self.getURL(search_url)
if not data:
logger.log(u"No data returned from " + search_url, logger.ERROR)
return []
# hack this in until it's fixed server side
if not data.startswith('<?xml'):
data = '<?xml version="1.0" encoding="ISO-8859-1" ?>' + data
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " XML data", logger.ERROR)
return []
if self._checkAuthFromData(parsedXML):
if parsedXML.tag == 'rss':
items = parsedXML.findall('.//item')
else:
logger.log(u"Resulting XML from " + self.name + " isn't RSS, not parsing it", logger.ERROR)
return []
results = []
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if title and url:
logger.log(u"Adding item from RSS to results: " + title, logger.DEBUG)
results.append(curItem)
else:
logger.log(
u"The XML returned from the " + self.name + " RSS feed is incomplete, this result is unusable",
logger.DEBUG)
return results
return []
示例14: _doSearch
def _doSearch(self, search_params, show=None, max_age=0):
self._checkAuth()
params = {"t": "tvsearch",
"maxage": sickbeard.USENET_RETENTION,
"limit": 100,
"cat": self.catIDs}
# if max_age is set, use it, don't allow it to be missing
if max_age or not params['maxage']:
params['maxage'] = max_age
if search_params:
params.update(search_params)
if self.needs_auth and self.key:
params['apikey'] = self.key
results = []
offset = total = hits = 0
# hardcoded to stop after a max of 4 hits (400 items) per query
while (hits < 4) and (offset == 0 or offset < total):
if hits > 0:
# sleep for a few seconds to not hammer the site and let cpu rest
time.sleep(2)
params['offset'] = offset
search_url = self.url + 'api?' + urllib.urlencode(params)
logger.log(u"Search url: " + search_url, logger.DEBUG)
data = self.getURL(search_url)
if not data:
logger.log(u"No data returned from " + search_url, logger.ERROR)
return results
# hack this in until it's fixed server side
if not data.startswith('<?xml'):
data = '<?xml version="1.0" encoding="ISO-8859-1" ?>' + data
parsedXML = helpers.parse_xml(data)
if parsedXML is None:
logger.log(u"Error trying to load " + self.name + " XML data", logger.ERROR)
return results
if self._checkAuthFromData(parsedXML):
if parsedXML.tag == 'rss':
items = []
response_nodes = []
for node in parsedXML.getiterator():
# Collect all items for result parsing
if node.tag == "item":
items.append(node)
# Find response nodes but ignore XML namespacing to
# accomodate providers with alternative definitions
elif node.tag.split("}", 1)[-1] == "response":
response_nodes.append(node)
# Verify that one and only one node matches and use it,
# return otherwise
if len(response_nodes) != 1:
logger.log(u"No valid, unique response node was found in the API response",
logger.ERROR)
return results
response = response_nodes[0]
else:
logger.log(u"Resulting XML from " + self.name + " isn't RSS, not parsing it", logger.ERROR)
return results
# process the items that we have
for curItem in items:
(title, url) = self._get_title_and_url(curItem)
if title and url:
# commenting this out for performance reasons, we see the results when they are added to cache anyways
# logger.log(u"Adding item from RSS to results: " + title, logger.DEBUG)
results.append(curItem)
else:
logger.log(u"The XML returned from the " + self.name + " RSS feed is incomplete, this result is unusable", logger.DEBUG)
# check to see if our offset matches what was returned, otherwise dont trust their values and just use what we have
if offset != int(response.get('offset') or 0):
logger.log(u"Newznab provider returned invalid api data, report this to your provider! Aborting fetching further results.", logger.WARNING)
return results
try:
total = int(response.get('total') or 0)
except AttributeError:
logger.log(u"Newznab provider provided invalid total.", logger.WARNING)
break
# if we have 0 results, just break out otherwise increment and continue
if total == 0:
break
#.........这里部分代码省略.........