当前位置: 首页>>代码示例>>Python>>正文


Python dom_parser.parse_dom函数代码示例

本文整理汇总了Python中resources.lib.modules.dom_parser.parse_dom函数的典型用法代码示例。如果您正苦于以下问题:Python parse_dom函数的具体用法?Python parse_dom怎么用?Python parse_dom使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parse_dom函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'})

            links = re.findall('''(?:link|file)["']?\s*:\s*["'](.+?)["']''', ''.join([i.content for i in r]))
            links += [l.attrs['src'] for i in r for l in dom_parser.parse_dom(i, 'iframe', req='src')]
            links += [l.attrs['src'] for i in r for l in dom_parser.parse_dom(i, 'source', req='src')]

            for i in links:
                try:
                    i = re.sub('\[.+?\]|\[/.+?\]', '', i)
                    i = client.replaceHTMLCodes(i)

                    valid, host = source_utils.is_host_valid(i, hostDict)
                    if not valid: continue

                    sources.append({'source': host, 'quality': 'SD', 'language': 'ko', 'url': i, 'direct': False, 'debridonly': False})
                except:
                    pass

            return sources
        except:
            return sources
开发者ID:varunrai,项目名称:repository.magicality,代码行数:29,代码来源:dramafire.py

示例2: __search

    def __search(self, titles, year, episode='0'):
        try:
            title = titles[0]
            if int(episode) > 0: title += ' episode %s' % episode

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(cleantitle.query(title)))

            r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'post'})
            r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:episode)\s+(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(episode) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
开发者ID:varunrai,项目名称:repository.magicality,代码行数:26,代码来源:dramafire.py

示例3: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = dom_parser.parse_dom(r, 'article')
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'entry-content'})

            links = re.findall('''(?:link|file)["']?\s*:\s*["'](.+?)["']''', ''.join([i.content for i in r]))
            links += [l.attrs['src'] for i in r for l in dom_parser.parse_dom(i, 'iframe', req='src')]
            links += [l.attrs['src'] for i in r for l in dom_parser.parse_dom(i, 'source', req='src')]

            for i in links:
                try:
                    valid, hoster = source_utils.is_host_valid(i, hostDict)
                    if not valid: continue

                    urls = []
                    if 'google' in i: host = 'gvideo'; direct = True; urls = directstream.google(i);
                    if 'google' in i and not urls and directstream.googletag(i): host = 'gvideo'; direct = True; urls = [{'quality': directstream.googletag(i)[0]['quality'], 'url': i}]
                    elif 'ok.ru' in i: host = 'vk'; direct = True; urls = directstream.odnoklassniki(i)
                    elif 'vk.com' in i: host = 'vk'; direct = True; urls = directstream.vk(i)
                    else: host = hoster; direct = False; urls = [{'quality': 'SD', 'url': i}]

                    for x in urls: sources.append({'source': host, 'quality': x['quality'], 'language': 'ko', 'url': x['url'], 'direct': direct, 'debridonly': False})
                except:
                    pass

            return sources
        except:
            return sources
开发者ID:mpie,项目名称:repo,代码行数:34,代码来源:iheartdrama.py

示例4: __search

    def __search(self, titles):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'})
            r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'})
            r = dom_parser.parse_dom(r, 'a', req='href')

                           
                                         

            for i in r:
                title = i[1]
                if re.search('\*(?:.*?)\*', title) is not None:
                    title = re.sub('\*(?:.*?)\*', '', title)
                title = cleantitle.get(title)
                if title in t:
                    return source_utils.strip_domain(i[0]['href'])
                else:
                    return
        except:
            return
开发者ID:vphuc81,项目名称:MyRepository,代码行数:28,代码来源:movie2k-ag.py

示例5: __search

    def __search(self, titles, year, season='0'):
        try:
            url = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            r = client.request(url, post=post)

            r = r.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'eBlock'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'eTitle'}), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*\d+_goda/')})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), [x.content for x in i[1] if re.match('\d{4}', x.content)][0] if i[1] else '0') for i in r if i[0]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r if i]
            r = [(i[0], i[3][0][0] if i[3] else i[1], i[2]) for i in r]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return
开发者ID:varunrai,项目名称:repository.magicality,代码行数:26,代码来源:kinoxax.py

示例6: episode

    def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            tvshowtitle = data['tvshowtitle']
            localtvshowtitle = data['localtvshowtitle']
            aliases = source_utils.aliases_to_array(eval(data['aliases']))

            url = self.__search([localtvshowtitle] + aliases, data['year'], season)
            if not url and tvshowtitle != localtvshowtitle: url = self.__search([tvshowtitle] + aliases, data['year'], season)
            if not url: return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['list-inline', 'list-film']})
            r = dom_parser.parse_dom(r, 'li')
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r if i]
            r = [(i[0], i[1] if re.compile("^(\d+)$").match(i[1]) else '0') for i in r]
            r = [i[0] for i in r if int(i[1]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return
开发者ID:amadu80,项目名称:repository.xvbmc,代码行数:27,代码来源:hdfilme.py

示例7: __search

    def __search(self, titles, year):
        try:
            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0])))

            query = urlparse.urljoin(self.base_link, query)

            t = cleantitle.get(titles[0])
            scraper = cfscrape.create_scraper()
            data = scraper.get(query).content
            #data = client.request(query, referer=self.base_link)
            data = client.parseDOM(data, 'div', attrs={'class': 'result-item'})
            r = dom_parser.parse_dom(data, 'div', attrs={'class': 'title'})
            r = zip(dom_parser.parse_dom(r, 'a'), dom_parser.parse_dom(data, 'span', attrs={'class': 'year'}))

            url = []
            for i in range(len(r)):
                title = cleantitle.get(r[i][0][1])
                title = re.sub('(\d+p|4k|3d|hd|season\d+)','',title)
                y = r[i][1][1]
                link = r[i][0][0]['href']
                if 'season' in title: continue
                if t == title and y == year:
                    if 'season' in link:
                        url.append(source_utils.strip_domain(link))
                        print url[0]
                        return url[0]
                    else: url.append(source_utils.strip_domain(link))

            return url
        except:
            return
开发者ID:CYBERxNUKE,项目名称:xbmc-addon,代码行数:31,代码来源:mzmovies.py

示例8: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            query = urlparse.urljoin(self.base_link, url)

            r = client.request(query)

            quality = dom_parser.parse_dom(r, 'span', attrs={'id': 'release_text'})[0].content.split(' ')[0]
            quality, info = source_utils.get_release_quality(quality)

            r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'currentStreamLinks'})
            r = [(dom_parser.parse_dom(i, 'p', attrs={'class': 'hostName'}), dom_parser.parse_dom(i, 'a', attrs={'class': 'stream-src'}, req='data-id')) for i in r]
            r = [(re.sub(' hd$', '', i[0][0].content.lower()), [x.attrs['data-id'] for x in i[1]]) for i in r if i[0] and i[1]]

            for hoster, id in r:
                valid, hoster = source_utils.is_host_valid(hoster, hostDict)
                if not valid: continue

                sources.append({'source': hoster, 'quality': quality, 'language': 'de', 'info': ' | '.join(info + ['' if len(id) == 1 else 'multi-part']), 'url': id, 'direct': False, 'debridonly': False, 'checkquality': True})

            return sources
        except:
            return sources
开发者ID:varunrai,项目名称:repository.magicality,代码行数:27,代码来源:filmpalast.py

示例9: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            url = data['url']
            episode = data.get('episode')

            r = client.request(urlparse.urljoin(self.base_link, url))

            if episode:
                rel = dom_parser.parse_dom(r, 'a', attrs={'class': 'fstab', 'title': re.compile('Episode %s$' % episode)}, req='data-rel')
                rel = [dom_parser.parse_dom(r, 'div', attrs={'id': i.attrs['data-rel']}) for i in rel]
                rel = [i[0].content for i in rel if i]
                r = ' '.join(rel)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': re.compile('s?elink')})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [i.attrs['href'] for i in r]

            for h_url in r:
                valid, hoster = source_utils.is_host_valid(h_url, hostDict)
                if not valid: continue

                sources.append({'source': hoster, 'quality': 'SD', 'language': 'fr', 'url': h_url, 'direct': False, 'debridonly': False})

            return sources
        except:
            return sources
开发者ID:varunrai,项目名称:repository.magicality,代码行数:33,代码来源:fullstream.py

示例10: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            url = data.get('url')
            episode = int(data.get('episode', 1))

            r = client.request(urlparse.urljoin(self.base_link, url))
            r = {'': dom_parser.parse_dom(r, 'div', attrs={'id': 'gerdub'}), 'subbed': dom_parser.parse_dom(r, 'div', attrs={'id': 'gersub'})}

            for info, data in r.iteritems():
                data = dom_parser.parse_dom(data, 'tr')
                data = [dom_parser.parse_dom(i, 'a', req='href') for i in data if dom_parser.parse_dom(i, 'a', attrs={'id': str(episode)})]
                data = [(link.attrs['href'], dom_parser.parse_dom(link.content, 'img', req='src')) for i in data for link in i]
                data = [(i[0], i[1][0].attrs['src']) for i in data if i[1]]
                data = [(i[0], re.findall('/(\w+)\.\w+', i[1])) for i in data]
                data = [(i[0], i[1][0]) for i in data if i[1]]

                for link, hoster in data:
                    valid, hoster = source_utils.is_host_valid(hoster, hostDict)
                    if not valid: continue

                    sources.append({'source': hoster, 'quality': 'SD', 'language': 'de', 'url': link, 'info': info, 'direct': False, 'debridonly': False})

            return sources
        except:
            return sources
开发者ID:azumimuo,项目名称:family-xbmc-addon,代码行数:32,代码来源:animebase.py

示例11: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []
        try:
            if not url:
                return sources

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)
            r = r.replace('\\"', '"')

            links = dom_parser.parse_dom(r, 'tr', attrs={'id': 'tablemoviesindex2'})

            for i in links:
                try:
                    host = dom_parser.parse_dom(i, 'img', req='alt')[0].attrs['alt']
                    host = host.split()[0].rsplit('.', 1)[0].strip().lower()
                    host = host.encode('utf-8')

                    valid, host = source_utils.is_host_valid(host, hostDict)
                    if not valid: continue

                    url = dom_parser.parse_dom(i, 'a', req='href')[0].attrs['href']
                    url = client.replaceHTMLCodes(url)
                    url = urlparse.urljoin(self.base_link, url)
                    url = url.encode('utf-8')

                    sources.append({'source': host, 'quality': 'SD', 'language': 'de', 'url': url, 'direct': False, 'debridonly': False})
                except:
                    pass

            return sources
        except:
            return sources
开发者ID:vphuc81,项目名称:MyRepository,代码行数:34,代码来源:movie4k.py

示例12: __search

    def __search(self, titles, year, imdb):
        try:
            query = self.search_link % (urllib.quote_plus(titles[0]))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie_cell'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'bottom'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r]
            r = [(dom_parser.parse_dom(i[0], 'a', req=['href', 'title']), re.findall('[(](\d{4})[)]', i[1][0].content)) for i in r if i[0] and i[1]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0]) for i in r if i[0] and i[1]]
            r = [(i[0], i[1].lower(), i[2]) for i in r if i[2] in y]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t]

            if len(r) > 1:
                for i in r:
                    data = client.request(urlparse.urljoin(self.base_link, i))
                    data = dom_parser.parse_dom(data, 'a', attrs={'name': re.compile('.*/tt\d+.*')}, req='name')
                    data = [re.findall('.+?(tt\d+).*?', d.attrs['name']) for d in data]
                    data = [d[0] for d in data if len(d) > 0 and d[0] == imdb]

                    if len(data) >= 1:
                        url = i
            else:
                url = r[0]

            if url:
                return source_utils.strip_domain(url)
        except:
            return
开发者ID:azumimuo,项目名称:family-xbmc-addon,代码行数:34,代码来源:kinodogs.py

示例13: __search

    def __search(self, titles, type, year, season=0, episode=False):
        try:
            years = [str(year), str(int(year) + 1), str(int(year) - 1)]
            years = ['&veroeffentlichung[]=%s' % i for i in years]

            query = self.search_link % (type, urllib.quote_plus(cleantitle.query(titles[0])))
            query += ''.join(years)
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]

            r = self.__proceed_search(query)
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and int(i[2]) == int(season)][0]

            url = source_utils.strip_domain(r)
            if episode:
                r = client.request(urlparse.urljoin(self.base_link, url))
                r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'})
                r = dom_parser.parse_dom(r, 'li')
                r = dom_parser.parse_dom(r, 'a', req='href')
                r = [i.attrs['href'] for i in r if i and int(i.content) == int(episode)][0]

                url = source_utils.strip_domain(r)
            return url
        except:
            return
开发者ID:azumimuo,项目名称:family-xbmc-addon,代码行数:26,代码来源:meinkino.py

示例14: __search

    def __search(self, titles, year, content):
        try:

            query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0])))

            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i][0] #cleantitle.get(titles[0])

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs={'class': 'tab-content clearfix'})

            if content == 'movies':
                r = client.parseDOM(r, 'div', attrs={'id': 'movies'})
            else:
                r = client.parseDOM(r, 'div', attrs={'id': 'series'})

            data = dom_parser.parse_dom(r, 'figcaption')

            for i in data:
                title = i[0]['title']
                title = cleantitle.get(title)
                if title in t:
                    x = dom_parser.parse_dom(i, 'a', req='href')
                    return source_utils.strip_domain(x[0][0]['href'])
                else:
                    url = dom_parser.parse_dom(i, 'a', req='href')
                    data = client.request(url[0][0]['href'])
                    data = re.findall('<h1><a.+?">(.+?)\((\d{4})\).*?</a></h1>', data, re.DOTALL)[0]
                    if titles[0] in data[0] and year == data[1]: return source_utils.strip_domain(url[0][0]['href'])

            return
        except:
            return
开发者ID:mpie,项目名称:repo,代码行数:35,代码来源:tainiesonline.py

示例15: sources

    def sources(self, url, hostDict, hostprDict):
        sources = []

        try:
            if not url:
                return sources

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'hosterSiteVideo'})
            r = dom_parser.parse_dom(r, 'li', attrs={'data-lang-key': re.compile('[1|3]')})
            r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'h4'), 'subbed' if i.attrs['data-lang-key'] == '3' else '') for i in r]
            r = [(i[0][0].attrs['href'], i[1][0].content.lower(), i[2]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [(i[0], i[1], re.findall('(.+?)\s*<br\s*/?>(.+?)$', i[1], re.DOTALL), i[2]) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '', i[3]) for i in r]
            r = [(i[0], i[1], 'HD' if 'hosterhdvideo' in i[2] else 'SD', i[3]) for i in r]

            for link, host, quality, info in r:
                valid, host = source_utils.is_host_valid(host, hostDict)
                if not valid: continue

                sources.append({'source': host, 'quality': quality, 'language': 'de', 'url': link, 'info': info, 'direct': False, 'debridonly': False})

            return sources
        except:
            return sources
开发者ID:YourFriendCaspian,项目名称:dotfiles,代码行数:26,代码来源:serienstream.py


注:本文中的resources.lib.modules.dom_parser.parse_dom函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。