当前位置: 首页>>代码示例>>Python>>正文


Python scrapers.ScraperResult类代码示例

本文整理汇总了Python中dudehere.routines.scrapers.ScraperResult的典型用法代码示例。如果您正苦于以下问题:Python ScraperResult类的具体用法?Python ScraperResult怎么用?Python ScraperResult使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ScraperResult类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_results

	def process_results(self, soup):
		results = []
		rows=soup.findAll('tr')
		for i in range(0,len(rows)):
			try:
				row=rows[i]
				link=row.find('td').find('a')['href']
				import HTMLParser
				h = HTMLParser.HTMLParser()
				dm=row.find('td').getText()
				domain=h.unescape(dm).strip()
				try:
					quality=row.findAll('td')[3].find('img')['src']
				except:
					quality=''
				host_name = domain
				if self.filter_host(host_name):
					url = "%s://%s" % (self.service, link)
					result = ScraperResult(self.service, host_name, url)
					if 'qualityDVD' in quality:
						result.quality = QUALITY.SD480
					elif 'qualityhd' in quality:
						result.quality = QUALITY.SD720
					else:
						result.quality = QUALITY.UNKNOWN

					results.append(result)	
			except:
				pass
		return results
开发者ID:bialagary,项目名称:mw,代码行数:30,代码来源:projectfreetv.py

示例2: get_resolved_url

	def get_resolved_url(self, raw_url):
		resolved_url = ''
		uri = '/player/pk/pk/plugins/player_p2.php?url=' + raw_url
		json = self.request(uri, return_json=True)
		videos = []
		for link in json:
			if re.match('^video', link['type']):
				videos.append(link)
		if len(videos) == 1:
			resolved_url = videos[0]['url']
			return resolved_url
		else:
			self.search_results = []
			for v in videos:
				url = v['url']
				obj = urlparse(url)
				host_name =  re.sub('^www(.+?)\.', '', obj.hostname)
				result = ScraperResult(self.service, host_name, url)
				if v['width'] > 1280:
					result.quality = QUALITY.HD1080
				elif v['width'] == 1280:
					result.quality = QUALITY.HD720
				elif v['width'] == 640:
					result.quality = QUALITY.SD480
				else:
					result.quality = QUALITY.UNKNOWN	 
				self.search_results.append(result)
			resolved_url =  self.select_stream()			
		return resolved_url
开发者ID:bialagary,项目名称:mw,代码行数:29,代码来源:yify.py

示例3: process_tv_results

	def process_tv_results(self, js):
		results = []
		url = "%s://%s" % (self.service, js['url'])
		result = ScraperResult(self.service, 'movietv', url)
		result.quality = QUALITY.HD720
		results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:7,代码来源:movietv.py

示例4: process_results

	def process_results(self, html):
		results = []
		links = re.finditer("pic=([^&]+)", html)
		for link in links:
			url = "%s://%s" % (self.service, link.group(1))
			result = ScraperResult(self.service, self.service, url)
			result.quality = QUALITY.HD720
			results.append(result)
		return results
开发者ID:bialagary,项目名称:mw,代码行数:9,代码来源:yify.py

示例5: process_results

	def process_results(self, html):
		results = []
		for match in re.finditer('{\s*file\s*:\s*"([^"]+)', html):
			stream_url = match.group(1)
			if 'dizlab' not in stream_url.lower():
				url = "%s://%s" % (self.service, stream_url)
				result = ScraperResult(self.service, 'gvideo', url)
				result.quality = self.test_gv_quality(stream_url)
				results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:10,代码来源:dizilab.py

示例6: process_results

	def process_results(self, path):
		results = []
		url = "%s://%s" % (self.service, path)
		result = ScraperResult(self.service, 'VideoLibrary', url)
		result.quality = QUALITY.LOCAL
		result.size = vfs.get_size(path)
		result.text = path
		parts = vfs.path_parts(path)
		result.extension = parts['extension']
		results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:11,代码来源:local.py

示例7: process_results

	def process_results(self, html):
		results = []
		for match in re.finditer('<a[^>]+href="([^"]+)[^>]+>(Version \d+)<', html):
			url, version = match.groups()
			host_name = urlsplit(url).hostname.replace('embed.', '')
			if self.filter_host(host_name) and host_name != 'putlocker.is':
				url = "%s://%s" % (self.service, url)
				result = ScraperResult(self.service, host_name, url)
				result.quality = QUALITY.HIGH
				results.append(result)
		return results
开发者ID:bialagary,项目名称:mw,代码行数:11,代码来源:putlocker.py

示例8: process_results

	def process_results(self, soup):
		results = []
		links = soup.findAll('a', {"rel": "nofollow", "target": "_blank"})
		for link in links:
			host_name = link.string.lower()
			if host_name in self.domains:
				url = "%s://%s" % (self.service, link['href'])
				result = ScraperResult(self.service, host_name, url)
				result.quality = QUALITY.UNKNOWN
				results.append(result)	
		return results
开发者ID:bialagary,项目名称:mw,代码行数:11,代码来源:example.py

示例9: process_movie_results

	def process_movie_results(self, html):
		results = []
		for match in re.finditer('var\s+(videolink[^\s]*)\s*=\s*"([^"]+)', html):
			var_name, url = match.groups()
			url = "%s://%s" % (self.service, url)
			result = ScraperResult(self.service, 'movietv', url)
			if 'hd' in var_name:
				result.quality = QUALITY.HD1080
			else:
				result.quality = QUALITY.HD720
			results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:12,代码来源:movietv.py

示例10: process_results

	def process_results(self, soup):
		results = []
		tag=str(soup.find('div',{'id':'linkname'}))
		reg=re.compile("go_to\(\d+,'(.+?)'\)")
		links=list(re.findall(reg,tag))
		for link in links:
			host_name = self.get_hostname(link)
			if self.filter_host(host_name):
				url = "%s://%s" % (self.service, link)
				result = ScraperResult(self.service, host_name, url)
				result.quality = QUALITY.UNKNOWN
				results.append(result)	
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:13,代码来源:tvonline.py

示例11: _get_sources

	def _get_sources(self, vid):
		uri = '/membersonly/components/com_iceplayer/video.php?h=374&w=631&vid=%s&img=' % vid
		results = []
		html = self.request(uri)
		soup = BeautifulSoup(html)
		
		match = re.search('lastChild\.value="([^"]+)"(?:\s*\+\s*"([^"]+))?', html)
		secret = ''.join(match.groups(''))

		match = re.search('"&t=([^"]+)', html)
		t = match.group(1)

		match = re.search('(?:\s+|,)s\s*=(\d+)', html)
		s_start = int(match.group(1))

		match = re.search('(?:\s+|,)m\s*=(\d+)', html)
		m_start = int(match.group(1))

		match = re.search('<iframe[^>]*src="([^"]+)', html)
		ad_url = urllib.quote(match.group(1))
		
			
		for block in soup.findAll('div', {"class": "ripdiv"}):
			isHD = block.find('b').string == 'HD 720p'
			if isHD: quality = QUALITY.HD720
			else: quality = QUALITY.SD480
			
			mirrors = block.findAll("p")
			for mirror in mirrors:
				links = mirror.findAll("a")
				for link in links:
					mirror_id = link['onclick'][3:len(link['onclick'])-1]
					host_name, title = self.get_provider(link)
					if host_name:
						'''attribs = [
							self.name, 
							self.set_color(QUALITY.r_map[quality], self.QUALITY_COLOR), 
							self.set_color(host_name, self.HOST_COLOR)
						]'''
						s = s_start + random.randint(1, 100)
						m = m_start + (s - s_start) + random.randint(1, 100)
						url = '%s:///membersonly/components/com_iceplayer/video.phpAjaxResp.php?id=%s&s=%s&iqs=&url=&m=%s&cap= &sec=%s&t=%s' % (self.service, mirror_id, s, m, secret, t)
						#display = "[%s]: %s" % (' | '.join(attribs), title)
						#record = {"title": display, "url": url, "host": host_name, "service": self.service, "quality": quality}
						result = ScraperResult(self.service, host_name, url, title)
						result.quality = quality
						results.append(result)
		return results
开发者ID:natko1412,项目名称:script.module.dudehere.routines,代码行数:48,代码来源:icefilms.py

示例12: process_results

	def process_results(self, html):
		results = []
		pattern = r'id="tablemoviesindex2".*?href="([^"]+).*?&nbsp;([^<]+)(.*)'
		for match in re.finditer(pattern, html):
			url, host_name, extra = match.groups()
			if not url.startswith('/'): url = '/' + url
			r = re.search('/smileys/(\d+)\.gif', extra)
			if r:
				smiley = r.group(1)
			else:
				smiley = None
			url = "%s://%s" % (self.service, url)
			result = ScraperResult(self.service, host_name.lower(), url)
			result.quality = QUALITY_MAP[smiley]
			results.append(result)
		return results
开发者ID:bialagary,项目名称:mw,代码行数:16,代码来源:movie4k.py

示例13: process_results

	def process_results(self, soup):
		results = []
		rows=soup.findAll('div',{'class':'site'})
		for i in range(0,len(rows)):
			try:
				row=rows[i]
				domain=row.find('a')['data-hostname'].lower()
				link=row.find('a')['data-actuallink']
				host_name = domain
				if self.filter_host(host_name):
					url = "%s://%s" % (self.service, link)
					result = ScraperResult(self.service, host_name, url)
					result.quality = QUALITY.UNKNOWN
					results.append(result)	
			except:
				pass
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:17,代码来源:watchepisode.py

示例14: _get_sources

	def _get_sources(self, vid):
		uri = self.referer % vid
		results = []
		html = self.request(uri)
		soup = BeautifulSoup(html)
		
		match = re.search('lastChild\.value="([^"]+)"(?:\s*\+\s*"([^"]+))?', html)
		secret = ''.join(match.groups(''))

		match = re.search('"&t=([^"]+)', html)
		t = match.group(1)

		match = re.search('(?:\s+|,)s\s*=(\d+)', html)
		s_start = int(match.group(1))

		match = re.search('(?:\s+|,)m\s*=(\d+)', html)
		m_start = int(match.group(1))
		
		match = re.search('<iframe[^>]*src="([^"]+)', html)
		if match:
			ad_url = urllib.quote(match.group(1))
		else:
			ad_url = ''
			
		for block in soup.findAll('div', {"class": "ripdiv"}):
			isHD = 'HD 720p' in block.find('b').string
			if isHD: quality = QUALITY.HD720
			else: quality = QUALITY.SD480
			
			mirrors = block.findAll("p")
			for mirror in mirrors:
				links = mirror.findAll("a")
				for link in links:
					mirror_id = link['onclick'][3:len(link['onclick'])-1]
					host_name, title = self.get_provider(link)
					if host_name:
						if self.filter_host(host_name):
							s = s_start + random.randint(3, 1000)
							m = m_start + random.randint(21, 1000)
							uri = self.ajax_url % (mirror_id, s, m, secret, t, ad_url)
							url = "%s://%s" % (self.service, uri)
							result = ScraperResult(self.service, host_name, url, title)
							result.quality = quality
							results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:45,代码来源:icefilms.py

示例15: process_results

	def process_results(self, html):
		results = []
		for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html):
			stream_url, height = match.groups()
			stream_url = stream_url.replace('\\&', '&').replace('\\/', '/')
			if 'v.asp' in stream_url and 'ok.ru' not in html:
				redirect = self.request(stream_url, get_redirect=True, append_base=False)
				url = "%s://%s" % (self.service, stream_url)
				if 'google' in redirect or '' in redirect:
					host_name = 'gvideo'
					quality = self.test_gv_quality(redirect)
				else:
					host_name = self.service
					quality = self.test_height_quality(height)
				result = ScraperResult(self.service, host_name, url)
				result.quality = self.test_gv_quality(redirect)
				results.append(result)
		return results
开发者ID:c0ns0le,项目名称:YCBuilds,代码行数:18,代码来源:sezonlukdizi.py


注:本文中的dudehere.routines.scrapers.ScraperResult类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。