Python scraper_utils.get_ua函数代码示例

本文整理汇总了Python中salts_lib.scraper_utils.get_ua函数的典型用法代码示例。如果您正苦于以下问题:Python get_ua函数的具体用法?Python get_ua怎么用?Python get_ua使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


示例1: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'videos'})
         if fragment:
             for match in re.finditer('href="([^"]+)[^>]*>([^<]+)', fragment[0]):
                 page_url, page_label = match.groups()
                 page_label = page_label.lower()
                 if page_label not in ALLOWED_LABELS: continue
                 sources = self.__get_sources(page_url, ALLOWED_LABELS[page_label])
                 for source in sources:
                     host = self._get_direct_hostname(source)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(source)
                         direct = True
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                     elif sources[source]['direct']:
                         quality = sources[source]['quality']
                         direct = True
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                         quality = sources[source]['quality']
                         direct = False
                         host = urlparse.urlparse(source).hostname
                         stream_url = source
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                     if sources[source]['subs']: hoster['subs'] = 'Turkish Subtitles'
     return hosters

示例2: resolve_link

    def resolve_link(self, link):
            headers = dict([item.split('=') for item in (link.split('|')[1]).split('&')])
            for key in headers: headers[key] = urllib.unquote(headers[key])
            link = link.split('|')[0]
            headers = {}

        if not link.startswith('http'):
            link = urlparse.urljoin(self.base_url, link)
        html = self._http_get(link, headers=headers, cache_limit=0)
        fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'})
        if fragment:
            iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
            if iframe_url:
                iframe_url = iframe_url[0]
                headers = {'Referer': link}
                html = self._http_get(iframe_url, headers=headers, cache_limit=0)
                sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey')
                if sitekey:
                    token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(sitekey[0], lang='en')
                    if token:
                        data = {'g-recaptcha-response': token}
                        html = self._http_get(iframe_url, data=data, cache_limit=0)
                match = re.search("\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I)
                if match:
                    html = html.replace(match.group(1), match.group(2))

                match = re.search("window\.atob[\([]+'([^']+)", html)
                if match:
                    func_count = len(re.findall('window\.atob', html))
                    html = match.group(1)
                    for _i in xrange(func_count):
                        html = base64.decodestring(html)
                streams = []
                for match in re.finditer('''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html):
                if len(streams) > 1:
                    if not self.auto_pick:
                        result = xbmcgui.Dialog().select(i18n('choose_stream'), [e[1] for e in streams])
                        if result > -1:
                            return streams[result][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
                        best_stream = ''
                        best_q = 0
                        for stream in streams:
                            stream_url, label = stream
                            if Q_ORDER[scraper_utils.height_get_quality(label)] > best_q:
                                best_q = Q_ORDER[scraper_utils.height_get_quality(label)]
                                best_stream = stream_url
                        if best_stream:
                            return best_stream + '|User-Agent=%s' % (scraper_utils.get_ua())
                elif streams:
                    return streams[0][0] + '|User-Agent=%s' % (scraper_utils.get_ua())

示例3: __get_king_links

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         xhr_url = iframe_url.split('?')[0] + '?p=GetVideoSources'
         html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                 host = self._get_direct_hostname(source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif label.isdigit():
                     quality = scraper_utils.height_get_quality(label)
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
     return hosters

示例4: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             url = urlparse.urljoin(self.base_url, VIDEO_URL)
             data = {'v': video_id}
             headers = XHR
             headers['Referer'] = page_url
             html = self._http_get(url, data=data, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_json(html, url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
     return hosters

示例5: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=8)
            q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'})
            if q_str:
                q_str = q_str[0].replace(' ', '').upper()
                page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
                page_quality = QUALITIES.HIGH
            for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}):
                match = re.search('file\s*:\s*"([^"]+)', fragment)
                if match:
                    stream_url = match.group(1)
                    stream_url = self.__get_ajax_sources(fragment, page_url)
                if stream_url:
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        quality = page_quality
                    stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}

        return sources

示例6: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=8)
         match = re.search('<b>Views:.*?([\d,]+)', html)
         if match:
             views = int(match.group(1).replace(',', ''))
             views = None
         html = self.__get_watch_now(html)
         for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
             title, fragment = match.groups()
             for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
                 stream_url, name = match.groups()
                 match = re.search('(\d+)', name)
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     if match:
                         quality = scraper_utils.height_get_quality(match.group(1))
                         quality = QUALITIES.HIGH
                     if not match or int(name) != int(video.episode):
                     quality = QUALITIES.HIGH
                 stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
                 hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['title'] = title
     return hosters

示例7: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'})
         if fragment:
             for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]):
                 url, host = match.groups()
                 host = host.lower()
                 host = re.sub('<img.*?/>', '', host)
                 host = HOSTS.get(host, host)
                 log_utils.log('%s - %s' % (url, host))
                 if host in GVIDEO_NAMES:
                     sources = self.__get_links(urlparse.urljoin(self.base_url, url))
                     direct = True
                     sources = {url: host}
                     direct = False
                 for source in sources:
                     if self._get_direct_hostname(source) == 'gvideo':
                         quality = scraper_utils.gv_get_quality(source)
                         source = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                         quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH)
                     hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct}
     return hosters

示例8: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url2, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
     return hosters

示例9: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources = self.__get_gk_links(html, url)
            if not sources:
                sources = self.__get_gk_links2(html)
            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    direct = True
                    quality = sources[source]
                    stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                    direct = False
                    stream_url = source
                    if self.base_url in source:
                        host = sources[source]
                        quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                        host = urlparse.urlparse(source).hostname
                        quality = sources[source]
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}

        return hosters

示例10: __get_cloud_links

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for script_url in dom_parser.parse_dom(html, 'script', {'data-cfasync': 'false'}, ret='src'):
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = STREAM_URL % (stream_host, variant['path'], scraper_utils.get_ua(), urllib.quote(page_url))
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = self._get_direct_hostname(stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
     return hosters

示例11: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(meta['title']):
                         match = line
                 elif self.__episode_match(line, video):
                     match = line
                     meta = scraper_utils.parse_episode_link(line['link'])
                 if match:
                     if meta['dubbed']: continue
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     stream_url = stream_url.replace(self.base_url, '')
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'format' in meta: hoster['format'] = meta['format']
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
     return hosters

示例12: __get_links

    def __get_links(self, url):
        sources = []
        match = re.search('src="([^"]+)', url)
        if match:
            url = match.group(1).replace('\\/', '/')
            html = self._http_get(url, cache_limit=0)
            match = re.search('<script\s+src="([^\']+)\'\+(\d+)\+\'([^\']+)', html)
            if match:
                page_url = ''.join(match.groups())
                page_url += str(random.random())
                html = self._http_get(page_url, cache_limit=0)
            for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html):
                stream_url, height = match.groups()
                stream_url = stream_url.replace('\\&', '&').replace('\\/', '/')
                if 'v.asp' in stream_url and 'ok.ru' not in url:
                    stream_redirect = self._http_get(stream_url, allow_redirect=False, cache_limit=0)
                    if stream_redirect: stream_url = stream_redirect

                if self._get_direct_hostname(stream_url) == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    quality = scraper_utils.height_get_quality(height)
                host = self._get_direct_hostname(stream_url)
                stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(url))
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
        return sources

示例13: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         match = re.search('<b>Views:.*?([\d,]+)', html)
         if match:
             views = int(match.group(1).replace(',', ''))
             views = None
         button = dom_parser.parse_dom(html, 'a', {'class': '[^"]*btn_watch_detail[^"]*'}, ret='href')
         if button:
             html = self._http_get(button[0], cache_limit=.5)
             for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
                 title, fragment = match.groups()
                 for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
                     stream_url, name = match.groups()
                     match = re.search('(\d+)', name)
                     if match:
                         quality = scraper_utils.height_get_quality(match.group(1))
                         quality = QUALITIES.HIGH
                     stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
                     hoster['title'] = title
     return hosters

示例14: get_sources

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.25)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'})
         if fragment:
             match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0])
             if match:
                 option_url = urlparse.urljoin(self.base_url, match.group(1))
                 html = self._http_get(option_url, cache_limit=2)
                 fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'})
                 if fragment:
                     iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
                     if iframe_url:
                         html = self._http_get(iframe_url[0], cache_limit=.25)
                         iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src')
                         if iframe_url:
                             html = self._http_get(iframe_url[0], allow_redirect=False, cache_limit=.25)
                             if html.startswith('http'):
                                 stream_url = html
                                 host = urlparse.urlparse(stream_url).hostname
                                 stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                                 quality = QUALITIES.HIGH
                                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
     return hosters

示例15: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            views = None
            fragment = dom_parser.parse_dom(html, 'img', {'src': '[^"]*view_icon.png'})
            if fragment:
                match = re.search('(\d+)', fragment[0])
                if match:
                    views = match.group(1)
            match = re.search('href="([^"]+-full-movie-[^"]+)', html)
            if match:
                url = match.group(1)
                html = self._http_get(url, cache_limit=.5)
            sources = self.__get_sources(html, url)
            match = re.search('href="([^"]+)[^>]*>\s*<button', html)
            if match:
                html = self._http_get(match.group(1), cache_limit=.5)
                sources.update(self.__get_sources(html, url))
            for source in sources:
                host = self._get_direct_hostname(source)
                stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                quality = sources[source]['quality']
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}

        return hosters
