本文整理汇总了Python中sickrage.core.helpers.bs4_parser函数的典型用法代码示例。如果您正苦于以下问题:Python bs4_parser函数的具体用法?Python bs4_parser怎么用?Python bs4_parser使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了bs4_parser函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
def parse(self, data, mode):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
with bs4_parser(data) as html:
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
# Continue only if at least one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
for result in torrent_table.find_all('tr')[1:]:
try:
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
url = result.find('td', attrs={'class': 'td_dl'}).find('a')
title = link.string
if re.search(r'\.\.\.', title):
data = sickrage.app.wsession.get(self.urls['base_url'] + "/" + link['href']).text
with bs4_parser(data) as details_html:
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
download_url = self.urls['download'] % url['href']
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
size = convert_size(result.find('td', attrs={'class': 'ttr_size'}).contents[0], -1)
if not all([title, download_url]):
continue
item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders,
'leechers': leechers, 'hash': ''}
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
results.append(item)
except Exception:
sickrage.app.log.error("Failed parsing provider")
return results
示例2: parse
def parse(self, data, mode, **kwargs):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
def _process_title(title):
# Quality, if no literal is defined it's HDTV
if 'calidad' not in title:
title += ' HDTV x264'
else:
title = title.replace('(calidad baja)', 'HDTV x264')
title = title.replace('(Buena calidad)', '720p HDTV x264')
title = title.replace('(Alta calidad)', '720p HDTV x264')
title = title.replace('(calidad regular)', 'DVDrip x264')
title = title.replace('(calidad media)', 'DVDrip x264')
# Language, all results from this provider have spanish audio, we append it to title (avoid to download undesired torrents)
title += ' SPANISH AUDIO-ELITETORRENT'
return title
with bs4_parser(data) as html:
torrent_table = html.find('table', class_='fichas-listado')
torrent_rows = torrent_table('tr') if torrent_table else []
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
for row in torrent_rows[1:]:
try:
title = _process_title(row.find('a', class_='nombre')['title'])
download_url = self.urls['base_url'] + row.find('a')['href']
if not all([title, download_url]):
continue
seeders = try_int(row.find('td', class_='semillas').get_text(strip=True))
leechers = try_int(row.find('td', class_='clientes').get_text(strip=True))
# seeders are not well reported. Set 1 in case of 0
seeders = max(1, seeders)
# Provider does not provide size
size = -1
results += [
{'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers}
]
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
except Exception:
sickrage.app.log.error("Failed parsing provider")
return results
示例3: parse
def parse(self, data, mode, **kwargs):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
with bs4_parser(data) as html:
table_body = html.find('tbody')
# Continue only if at least one release is found
if not table_body:
sickrage.app.log.debug('Data returned from provider does not contain any torrents')
return results
for row in table_body('tr'):
cells = row('td')
if len(cells) < 4:
continue
try:
title = download_url = None
info_cell = cells[0].a
if info_cell:
title = info_cell.get_text()
download_url = self._get_download_link(urljoin(self.urls['base_url'], info_cell.get('href')))
if not all([title, download_url]):
continue
title = '{name} {codec}'.format(name=title, codec='x264')
if self.custom_url:
if not validate_url(self.custom_url):
sickrage.app.log.warning("Invalid custom url: {}".format(self.custom_url))
return results
download_url = urljoin(self.custom_url, download_url.split(self.urls['base_url'])[1])
seeders = try_int(cells[2].get_text(strip=True))
leechers = try_int(cells[3].get_text(strip=True))
torrent_size = cells[1].get_text()
size = convert_size(torrent_size, -1)
results += [{
'title': title,
'link': download_url,
'size': size,
'seeders': seeders,
'leechers': leechers
}]
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
except Exception:
sickrage.app.log.error("Failed parsing provider")
return results
示例4: test_search
def test_search(self):
self.url = 'kickass.unblocked.li'
searchURL = '{}/usearch/American%20Dad%20S08/'.format(self.url)
data = WebSession().get(searchURL)
if not data:
return
with bs4_parser(data) as html:
torrent_table = html.find('table', attrs={'class': 'data'})
# Continue only if one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
if len(torrent_rows) < 2:
print("The data returned does not contain any torrents")
return
for tr in torrent_rows[1:]:
try:
link = urlparse.urljoin(self.url, (tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
id = tr.get('id')[-7:]
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
url = tr.find('a', 'imagnet')['href']
verified = True if tr.find('a', 'iverify') else False
trusted = True if tr.find('img', {'alt': 'verified'}) else False
seeders = int(tr.find_all('td')[-2].text)
leechers = int(tr.find_all('td')[-1].text)
except (AttributeError, TypeError):
continue
print title
示例5: get_download_url
def get_download_url(self, url):
try:
data = sickrage.app.wsession.get(urljoin(self.urls['base_url'], url)).text
with bs4_parser(data) as html:
return html.find('div', class_="btn-magnet").find('a').get('href')
except Exception:
pass
示例6: parse
def parse(self, data, mode):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
with bs4_parser(data) as html:
# Continue only if one Release is found
empty = html.find('h2', text="No .torrents fit this filter criteria")
if empty:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
torrent_table = html.find('table', attrs={'style': 'border: none; width: 100%;'})
if not torrent_table:
sickrage.app.log.error("Could not find table of torrents")
return results
torrent_rows = torrent_table.find_all('tr', attrs={'class': 'browse'})
for result in torrent_rows:
cells = result.find_all('td')
size = None
link = cells[1].find('a', attrs={'style': 'font-size: 1.25em; font-weight: bold;'})
torrent_id = link['href'].replace('details.php?id=', '')
try:
if link.has_key('title'):
title = link['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id, link.contents[0])
seeders = int(cells[9].contents[0])
leechers = int(cells[10].contents[0])
# Need size for failed downloads handling
if size is None:
if re.match(r'[0-9]+,?\.?[0-9]*[KkMmGg]+[Bb]+', cells[7].text):
size = convert_size(cells[7].text, -1)
if not all([title, download_url]):
continue
item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders,
'leechers': leechers, 'hash': ''}
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
results.append(item)
except Exception:
sickrage.app.log.error("Failed parsing provider.")
return results
示例7: search
def search(self, search_string, search_mode='eponly', epcount=0, age=0, epObj=None):
# FIXME ADD MODE
if self.show and not self.show.is_anime:
return []
sickrage.srCore.srLogger.debug("Search string: %s " % search_string)
params = {
"terms": search_string.encode('utf-8'),
"type": 1, # get anime types
}
searchURL = self.urls['base_url'] + '/search.php?' + urllib.urlencode(params)
sickrage.srCore.srLogger.debug("Search URL: %s" % searchURL)
try:
data = sickrage.srCore.srWebSession.get(searchURL).text
except Exception:
sickrage.srCore.srLogger.debug("No data returned from provider")
return []
results = []
try:
with bs4_parser(data) as html:
torrent_table = html.find('table', attrs={'class': 'listing'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
if torrent_rows:
if torrent_rows[0].find('td', attrs={'class': 'centertext'}):
a = 1
else:
a = 0
for top, bottom in zip(torrent_rows[a::2], torrent_rows[a::2]):
title = top.find('td', attrs={'class': 'desc-top'}).text
title.lstrip()
download_url = top.find('td', attrs={'class': 'desc-top'}).find('a')['href']
# FIXME
size = -1
seeders = 1
leechers = 0
if not all([title, download_url]):
continue
# Filter unseeded torrent
# if seeders < self.minseed or leechers < self.minleech:
# if mode != 'RSS':
# LOGGER.debug(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers))
# continue
item = title, download_url, size, seeders, leechers
results.append(item)
except Exception as e:
sickrage.srCore.srLogger.error("Failed parsing provider. Traceback: %s" % traceback.format_exc())
# FIXME SORTING
return results
示例8: parse
def parse(self, data, mode, **kwargs):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
def process_column_header(td):
result = ''
if td.a and td.a.img:
result = td.a.img.get('title', td.a.get_text(strip=True))
if not result:
result = td.get_text(strip=True)
return result
with bs4_parser(data) as html:
torrent_table = html.find('table', attrs={'id': 'torrent_table'})
torrent_rows = torrent_table('tr') if torrent_table else []
# Continue only if one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
# '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers'
labels = [process_column_header(label) for label in torrent_rows[0]('td')]
# Skip column headers
for row in torrent_rows[1:]:
try:
cells = row('td')
if len(cells) < len(labels):
continue
title = cells[labels.index('Name /Year')].find('a', dir='ltr').get_text(strip=True)
download = cells[labels.index('Name /Year')].find('a', title='Download')['href']
download_url = urljoin(self.urls['base_url'], download)
if not all([title, download_url]):
continue
seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))
torrent_size = cells[labels.index('Size')].get_text(strip=True)
size = convert_size(torrent_size, -1)
results += [
{'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers}
]
if mode != 'RSS':
sickrage.app.log.debug('Found result: {}'.format(title))
except Exception:
sickrage.app.log.error('Failed parsing provider')
return results
示例9: fetch_popular_shows
def fetch_popular_shows(self):
"""Get popular show information from IMDB"""
popular_shows = []
data = getURL(self.url, session=self.session, params=self.params, headers={"Referer": "http://akas.imdb.com/"})
if not data:
return None
with bs4_parser(data) as soup:
results = soup.find("table", {"class": "results"})
rows = results.find_all("tr")
for row in rows:
show = {}
image_td = row.find("td", {"class": "image"})
if image_td:
image = image_td.find("img")
show[b"image_url_large"] = self.change_size(image[b"src"], 3)
show[b"image_path"] = os.path.join("images", "imdb_popular", os.path.basename(show[b"image_url_large"]))
self.cache_image(show[b"image_url_large"])
td = row.find("td", {"class": "title"})
if td:
show[b"name"] = td.find("a").contents[0]
show[b"imdb_url"] = "http://www.imdb.com" + td.find("a")["href"]
show[b"imdb_tt"] = show[b"imdb_url"][-10:][0:9]
show[b"year"] = td.find("span", {"class": "year_type"}).contents[0].split(" ")[0][1:]
rating_all = td.find("div", {"class": "user_rating"})
if rating_all:
rating_string = rating_all.find("div", {"class": "rating rating-list"})
if rating_string:
rating_string = rating_string[b"title"]
match = re.search(r".* (.*)\/10.*\((.*)\).*", rating_string)
if match:
matches = match.groups()
show[b"rating"] = matches[0]
show[b"votes"] = matches[1]
else:
show[b"rating"] = None
show[b"votes"] = None
else:
show[b"rating"] = None
show[b"votes"] = None
outline = td.find("span", {"class": "outline"})
if outline:
show[b"outline"] = outline.contents[0]
else:
show[b"outline"] = ""
popular_shows.append(show)
return popular_shows
示例10: _doSearch
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0, epObj=None):
results = []
items = {'Season': [], 'Episode': [], 'RSS': []}
# check for auth
if not self._doLogin():
return results
for mode in search_params.keys():
sickrage.LOGGER.debug("Search Mode: %s" % mode)
for search_string in search_params[mode]:
if mode is not 'RSS':
sickrage.LOGGER.debug("Search string: %s " % search_string)
searchURL = self.urlsearch % (urllib.quote(search_string), self.categories)
sickrage.LOGGER.debug("Search URL: %s" % searchURL)
data = self.getURL(searchURL)
if not data:
continue
with bs4_parser(data) as html:
resultsTable = html.find("table", {"class": "table2 table-bordered2"})
if resultsTable:
rows = resultsTable.findAll("tr")
for row in rows:
link = row.find("a", href=re.compile("details.php"))
if link:
title = link.text
download_url = self.url + '/' + row.find("a", href=re.compile("download.php"))['href']
# FIXME
size = -1
seeders = 1
leechers = 0
if not all([title, download_url]):
continue
# Filter unseeded torrent
# if seeders < self.minseed or leechers < self.minleech:
# if mode is not 'RSS':
# sickrage.LOGGER.debug(u"Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})".format(title, seeders, leechers))
# continue
item = title, download_url, size, seeders, leechers
if mode is not 'RSS':
sickrage.LOGGER.debug("Found result: %s " % title)
items[mode].append(item)
# For each search mode sort all the items by seeders if available if available
items[mode].sort(key=lambda tup: tup[3], reverse=True)
results += items[mode]
return results
示例11: parse
def parse(self, data, mode, **kwargs):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
def process_column_header(td):
td_title = ''
if td.img:
td_title = td.img.get('title', td.get_text(strip=True))
if not td_title:
td_title = td.get_text(strip=True)
return td_title
with bs4_parser(data) as html:
torrent_table = html.find('table', id='sortabletable')
torrent_rows = torrent_table('tr') if torrent_table else []
# Continue only if at least one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
labels = [process_column_header(label) for label in torrent_rows[0]('td')]
# Skip column headers
for result in torrent_rows[1:]:
try:
title = result.find('div', class_='tooltip-target').get_text(strip=True)
# skip if torrent has been nuked due to poor quality
if title.startswith('Nuked.'):
continue
download_url = result.find(
'img', title='Click to Download this Torrent in SSL!').parent['href']
if not all([title, download_url]):
continue
cells = result('td')
seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True))
leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True))
torrent_size = cells[labels.index('Size')].get_text(strip=True)
size = convert_size(torrent_size, -1)
results += [
{'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers}
]
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
except Exception:
sickrage.app.log.error("Failed parsing provider")
return results
示例12: search
def search(self, search_strings, search_mode='eponly', epcount=0, age=0, epObj=None):
results = []
for mode in search_strings:
items = []
sickrage.srCore.srLogger.debug('Search Mode: {}'.format(mode))
for search_string in search_strings[mode]:
search_url = self.urls['feed']
if mode != 'RSS':
sickrage.srCore.srLogger.debug('Search string: {}'.format(search_string))
try:
data = sickrage.srCore.srWebSession.get(search_url, params={'f': search_string}).text
except Exception:
sickrage.srCore.srLogger.debug('No data returned from provider')
continue
if not data.startswith('<?xml'):
sickrage.srCore.srLogger.info('Expected xml but got something else, is your mirror failing?')
continue
with bs4_parser(data) as parser:
for item in parser('item'):
if item.category and 'tv' not in item.category.get_text(strip=True):
continue
title = item.title.get_text(strip=True)
t_hash = item.guid.get_text(strip=True).rsplit('/', 1)[-1]
if not all([title, t_hash]):
continue
download_url = "magnet:?xt=urn:btih:" + t_hash + "&dn=" + title
torrent_size, seeders, leechers = self._split_description(item.find('description').text)
size = convert_size(torrent_size) or -1
# Filter unseeded torrent
if seeders < self.minseed or leechers < self.minleech:
if mode != 'RSS':
sickrage.srCore.srLogger.debug("Discarding torrent because it doesn't meet the minimum seeders or leechers: {} (S:{} L:{})".format(title, seeders, leechers))
continue
items += [{
'title': title,
'link': download_url,
'size': size,
'seeders': seeders,
'leechers': leechers,
'hash': t_hash
}]
# For each search mode sort all the items by seeders if available
items.sort(key=lambda d: int(d.get('seeders', 0)), reverse=True)
results += items
return results
示例13: parse
def parse(self, data, mode):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
with bs4_parser(data) as html:
torrent_rows = html.findAll('tr')
# Continue only if one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
# Scenetime apparently uses different number of cells in #torrenttable based
# on who you are. This works around that by extracting labels from the first
# <tr> and using their index to find the correct download/seeders/leechers td.
labels = [label.get_text() for label in torrent_rows[0].find_all('td')]
for result in torrent_rows[1:]:
cells = result.find_all('td')
link = cells[labels.index('Name')].find('a')
full_id = link['href'].replace('details.php?id=', '')
torrent_id = full_id.split("&")[0]
try:
title = link.contents[0].get_text()
filename = "%s.torrent" % title.replace(" ", ".")
download_url = self.urls['download'] % (torrent_id, filename)
int(cells[labels.index('Seeders')].get_text())
seeders = int(cells[labels.index('Seeders')].get_text())
leechers = int(cells[labels.index('Leechers')].get_text())
# FIXME
size = -1
if not all([title, download_url]):
continue
item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders,
'leechers': leechers, 'hash': ''}
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
results.append(item)
except Exception:
sickrage.app.log.error("Failed parsing provider")
return results
示例14: parse
def parse(self, data, mode):
"""
Parse search results from data
:param data: response data
:param mode: search mode
:return: search results
"""
results = []
with bs4_parser(data) as html:
torrent_table = html.find("table", border="1")
torrent_rows = torrent_table("tr") if torrent_table else []
# Continue only if at least one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug("Data returned from provider does not contain any torrents")
return results
# "Type", "Name", Files", "Comm.", "Added", "TTL", "Size", "Snatched", "Seeders", "Leechers"
labels = [label.get_text(strip=True) for label in torrent_rows[0]("td")]
for result in torrent_rows[1:]:
try:
cells = result("td")
link = cells[labels.index("Name")].find("a", href=re.compile(r"download.php\?id="))["href"]
download_url = urljoin(self.urls['base_url'], link)
title_element = cells[labels.index("Name")].find("a", href=re.compile(r"details.php\?id="))
title = title_element.get("title", "") or title_element.get_text(strip=True)
if not all([title, download_url]):
continue
if self.freeleech:
# Free leech torrents are marked with green [F L] in the title (i.e. <font color=green>[F L]</font>)
freeleech = cells[labels.index("Name")].find("font", color="green")
if not freeleech or freeleech.get_text(strip=True) != "[F\xa0L]":
continue
seeders = try_int(cells[labels.index("Seeders")].get_text(strip=True))
leechers = try_int(cells[labels.index("Leechers")].get_text(strip=True))
torrent_size = cells[labels.index("Size")].get_text(strip=True)
size = convert_size(torrent_size, -1)
item = {'title': title, 'link': download_url, 'size': size, 'seeders': seeders,
'leechers': leechers, 'hash': ''}
if mode != "RSS":
sickrage.app.log.debug("Found result: {}".format(title))
results.append(item)
except Exception:
sickrage.app.log.error("Failed parsing provider.")
return results
示例15: parse
def parse(self, data, mode, **kwargs):
"""
Parse search results for items.
:param data: The raw response from a search
:param mode: The current mode used to search, e.g. RSS
:return: A list of items found
"""
results = []
with bs4_parser(data) as html:
torrent_table = html.find(class_='table-responsive results')
torrent_rows = torrent_table('tr') if torrent_table else []
# Continue only if at least one Release is found
if len(torrent_rows) < 2:
sickrage.app.log.debug('Data returned from provider does not contain any torrents')
return results
for result in torrent_rows[1:]:
cells = result('td')
if len(cells) < 9:
continue
try:
info = cells[1].find('a')
title = info.get_text(strip=True)
download_url = info.get('href')
if not (title and download_url):
continue
torrent_id = re.search(r'/(\d+)-', download_url)
download_url = self.urls['download'] % torrent_id.group(1)
seeders = try_int(cells[7].get_text(strip=True), 0)
leechers = try_int(cells[8].get_text(strip=True), 0)
torrent_size = cells[5].get_text()
size = convert_size(torrent_size, -1, ['O', 'KO', 'MO', 'GO', 'TO', 'PO'])
results += [{
'title': title,
'link': download_url,
'size': size,
'seeders': seeders,
'leechers': leechers
}]
if mode != 'RSS':
sickrage.app.log.debug("Found result: {}".format(title))
except Exception:
sickrage.app.log.error('Failed parsing provider.')
return results