本文整理汇总了Python中dudehere.routines.scrapers.ScraperResult类的典型用法代码示例。如果您正苦于以下问题:Python ScraperResult类的具体用法?Python ScraperResult怎么用?Python ScraperResult使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ScraperResult类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_results
def process_results(self, soup):
results = []
rows=soup.findAll('tr')
for i in range(0,len(rows)):
try:
row=rows[i]
link=row.find('td').find('a')['href']
import HTMLParser
h = HTMLParser.HTMLParser()
dm=row.find('td').getText()
domain=h.unescape(dm).strip()
try:
quality=row.findAll('td')[3].find('img')['src']
except:
quality=''
host_name = domain
if self.filter_host(host_name):
url = "%s://%s" % (self.service, link)
result = ScraperResult(self.service, host_name, url)
if 'qualityDVD' in quality:
result.quality = QUALITY.SD480
elif 'qualityhd' in quality:
result.quality = QUALITY.SD720
else:
result.quality = QUALITY.UNKNOWN
results.append(result)
except:
pass
return results
示例2: get_resolved_url
def get_resolved_url(self, raw_url):
resolved_url = ''
uri = '/player/pk/pk/plugins/player_p2.php?url=' + raw_url
json = self.request(uri, return_json=True)
videos = []
for link in json:
if re.match('^video', link['type']):
videos.append(link)
if len(videos) == 1:
resolved_url = videos[0]['url']
return resolved_url
else:
self.search_results = []
for v in videos:
url = v['url']
obj = urlparse(url)
host_name = re.sub('^www(.+?)\.', '', obj.hostname)
result = ScraperResult(self.service, host_name, url)
if v['width'] > 1280:
result.quality = QUALITY.HD1080
elif v['width'] == 1280:
result.quality = QUALITY.HD720
elif v['width'] == 640:
result.quality = QUALITY.SD480
else:
result.quality = QUALITY.UNKNOWN
self.search_results.append(result)
resolved_url = self.select_stream()
return resolved_url
示例3: process_tv_results
def process_tv_results(self, js):
results = []
url = "%s://%s" % (self.service, js['url'])
result = ScraperResult(self.service, 'movietv', url)
result.quality = QUALITY.HD720
results.append(result)
return results
示例4: process_results
def process_results(self, html):
results = []
links = re.finditer("pic=([^&]+)", html)
for link in links:
url = "%s://%s" % (self.service, link.group(1))
result = ScraperResult(self.service, self.service, url)
result.quality = QUALITY.HD720
results.append(result)
return results
示例5: process_results
def process_results(self, html):
results = []
for match in re.finditer('{\s*file\s*:\s*"([^"]+)', html):
stream_url = match.group(1)
if 'dizlab' not in stream_url.lower():
url = "%s://%s" % (self.service, stream_url)
result = ScraperResult(self.service, 'gvideo', url)
result.quality = self.test_gv_quality(stream_url)
results.append(result)
return results
示例6: process_results
def process_results(self, path):
results = []
url = "%s://%s" % (self.service, path)
result = ScraperResult(self.service, 'VideoLibrary', url)
result.quality = QUALITY.LOCAL
result.size = vfs.get_size(path)
result.text = path
parts = vfs.path_parts(path)
result.extension = parts['extension']
results.append(result)
return results
示例7: process_results
def process_results(self, html):
results = []
for match in re.finditer('<a[^>]+href="([^"]+)[^>]+>(Version \d+)<', html):
url, version = match.groups()
host_name = urlsplit(url).hostname.replace('embed.', '')
if self.filter_host(host_name) and host_name != 'putlocker.is':
url = "%s://%s" % (self.service, url)
result = ScraperResult(self.service, host_name, url)
result.quality = QUALITY.HIGH
results.append(result)
return results
示例8: process_results
def process_results(self, soup):
results = []
links = soup.findAll('a', {"rel": "nofollow", "target": "_blank"})
for link in links:
host_name = link.string.lower()
if host_name in self.domains:
url = "%s://%s" % (self.service, link['href'])
result = ScraperResult(self.service, host_name, url)
result.quality = QUALITY.UNKNOWN
results.append(result)
return results
示例9: process_movie_results
def process_movie_results(self, html):
results = []
for match in re.finditer('var\s+(videolink[^\s]*)\s*=\s*"([^"]+)', html):
var_name, url = match.groups()
url = "%s://%s" % (self.service, url)
result = ScraperResult(self.service, 'movietv', url)
if 'hd' in var_name:
result.quality = QUALITY.HD1080
else:
result.quality = QUALITY.HD720
results.append(result)
return results
示例10: process_results
def process_results(self, soup):
results = []
tag=str(soup.find('div',{'id':'linkname'}))
reg=re.compile("go_to\(\d+,'(.+?)'\)")
links=list(re.findall(reg,tag))
for link in links:
host_name = self.get_hostname(link)
if self.filter_host(host_name):
url = "%s://%s" % (self.service, link)
result = ScraperResult(self.service, host_name, url)
result.quality = QUALITY.UNKNOWN
results.append(result)
return results
示例11: _get_sources
def _get_sources(self, vid):
uri = '/membersonly/components/com_iceplayer/video.php?h=374&w=631&vid=%s&img=' % vid
results = []
html = self.request(uri)
soup = BeautifulSoup(html)
match = re.search('lastChild\.value="([^"]+)"(?:\s*\+\s*"([^"]+))?', html)
secret = ''.join(match.groups(''))
match = re.search('"&t=([^"]+)', html)
t = match.group(1)
match = re.search('(?:\s+|,)s\s*=(\d+)', html)
s_start = int(match.group(1))
match = re.search('(?:\s+|,)m\s*=(\d+)', html)
m_start = int(match.group(1))
match = re.search('<iframe[^>]*src="([^"]+)', html)
ad_url = urllib.quote(match.group(1))
for block in soup.findAll('div', {"class": "ripdiv"}):
isHD = block.find('b').string == 'HD 720p'
if isHD: quality = QUALITY.HD720
else: quality = QUALITY.SD480
mirrors = block.findAll("p")
for mirror in mirrors:
links = mirror.findAll("a")
for link in links:
mirror_id = link['onclick'][3:len(link['onclick'])-1]
host_name, title = self.get_provider(link)
if host_name:
'''attribs = [
self.name,
self.set_color(QUALITY.r_map[quality], self.QUALITY_COLOR),
self.set_color(host_name, self.HOST_COLOR)
]'''
s = s_start + random.randint(1, 100)
m = m_start + (s - s_start) + random.randint(1, 100)
url = '%s:///membersonly/components/com_iceplayer/video.phpAjaxResp.php?id=%s&s=%s&iqs=&url=&m=%s&cap= &sec=%s&t=%s' % (self.service, mirror_id, s, m, secret, t)
#display = "[%s]: %s" % (' | '.join(attribs), title)
#record = {"title": display, "url": url, "host": host_name, "service": self.service, "quality": quality}
result = ScraperResult(self.service, host_name, url, title)
result.quality = quality
results.append(result)
return results
示例12: process_results
def process_results(self, html):
results = []
pattern = r'id="tablemoviesindex2".*?href="([^"]+).*? ([^<]+)(.*)'
for match in re.finditer(pattern, html):
url, host_name, extra = match.groups()
if not url.startswith('/'): url = '/' + url
r = re.search('/smileys/(\d+)\.gif', extra)
if r:
smiley = r.group(1)
else:
smiley = None
url = "%s://%s" % (self.service, url)
result = ScraperResult(self.service, host_name.lower(), url)
result.quality = QUALITY_MAP[smiley]
results.append(result)
return results
示例13: process_results
def process_results(self, soup):
results = []
rows=soup.findAll('div',{'class':'site'})
for i in range(0,len(rows)):
try:
row=rows[i]
domain=row.find('a')['data-hostname'].lower()
link=row.find('a')['data-actuallink']
host_name = domain
if self.filter_host(host_name):
url = "%s://%s" % (self.service, link)
result = ScraperResult(self.service, host_name, url)
result.quality = QUALITY.UNKNOWN
results.append(result)
except:
pass
return results
示例14: _get_sources
def _get_sources(self, vid):
uri = self.referer % vid
results = []
html = self.request(uri)
soup = BeautifulSoup(html)
match = re.search('lastChild\.value="([^"]+)"(?:\s*\+\s*"([^"]+))?', html)
secret = ''.join(match.groups(''))
match = re.search('"&t=([^"]+)', html)
t = match.group(1)
match = re.search('(?:\s+|,)s\s*=(\d+)', html)
s_start = int(match.group(1))
match = re.search('(?:\s+|,)m\s*=(\d+)', html)
m_start = int(match.group(1))
match = re.search('<iframe[^>]*src="([^"]+)', html)
if match:
ad_url = urllib.quote(match.group(1))
else:
ad_url = ''
for block in soup.findAll('div', {"class": "ripdiv"}):
isHD = 'HD 720p' in block.find('b').string
if isHD: quality = QUALITY.HD720
else: quality = QUALITY.SD480
mirrors = block.findAll("p")
for mirror in mirrors:
links = mirror.findAll("a")
for link in links:
mirror_id = link['onclick'][3:len(link['onclick'])-1]
host_name, title = self.get_provider(link)
if host_name:
if self.filter_host(host_name):
s = s_start + random.randint(3, 1000)
m = m_start + random.randint(21, 1000)
uri = self.ajax_url % (mirror_id, s, m, secret, t, ad_url)
url = "%s://%s" % (self.service, uri)
result = ScraperResult(self.service, host_name, url, title)
result.quality = quality
results.append(result)
return results
示例15: process_results
def process_results(self, html):
results = []
for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html):
stream_url, height = match.groups()
stream_url = stream_url.replace('\\&', '&').replace('\\/', '/')
if 'v.asp' in stream_url and 'ok.ru' not in html:
redirect = self.request(stream_url, get_redirect=True, append_base=False)
url = "%s://%s" % (self.service, stream_url)
if 'google' in redirect or '' in redirect:
host_name = 'gvideo'
quality = self.test_gv_quality(redirect)
else:
host_name = self.service
quality = self.test_height_quality(height)
result = ScraperResult(self.service, host_name, url)
result.quality = self.test_gv_quality(redirect)
results.append(result)
return results