当前位置: 首页>>代码示例>>Python>>正文


Python _bsoup.BeautifulSoup类代码示例

本文整理汇总了Python中imdb.parser.http.bsouplxml._bsoup.BeautifulSoup的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup类的具体用法?Python BeautifulSoup怎么用?Python BeautifulSoup使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了BeautifulSoup类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getItems

    def getItems(self, data):

        results = []

        soup = BeautifulSoup(data)
        table = soup.find("table", { "class" : "chart" })

        try:
            for tr in table.findAll("tr"):
                item = {}

                for td in tr.findAll('td'):

                    # Get title and ID from <a>
                    if td.a and not td.a.img:
                        item['id'] = int(td.a['href'].split('/')[-1])
                        item['name'] = str(td.a.contents[0])

                    # Get year from <td>
                    if not td.h3 and not td.a:
                        if len(td.contents) == 1:
                            for y in td.contents:
                                try:
                                    item['year'] = int(y)
                                except ValueError:
                                    pass
                if item:
                    results.append(item)
        except AttributeError:
            log.error('No search results.')

        return results
开发者ID:FrozenCow,项目名称:CouchPotato,代码行数:32,代码来源:eta.py

示例2: checkForUpdateWindows

    def checkForUpdateWindows(self):
        try:
            data = urllib2.urlopen(self.downloads, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % self.downloads)
            return False

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'id':'s3_downloads'})

            latestUrl = 'http://github.com' + resultTable.find('a')['href'].replace(' ', '%20')
            try:
                latest = urllib2.urlopen(latestUrl, timeout = self.timeout)
            except (IOError, URLError):
                log.error('Failed to open %s.' % latestUrl)
                return False

            downloadUrl = latest.geturl()

            if 'r' + str(version.windows) in downloadUrl:
                return False

            return downloadUrl

        except AttributeError:
            log.debug('Nothing found.')

        return False
开发者ID:andme,项目名称:CouchPotato,代码行数:30,代码来源:updater.py

示例3: checkForUpdateWindows

    def checkForUpdateWindows(self):
        try:
            data = urllib2.urlopen(self.downloads, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % self.downloads)
            return False

        try:
            html = BeautifulSoup(data)
            results = html.findAll('a', attrs = {'href':re.compile('/downloads/')})

            for link in results:
                if 'windows' in str(link.parent).lower():
                    downloadUrl = 'http://github.com' + link.get('href').replace(' ', '%20')
                    break

            if 'r' + str(version.windows) in downloadUrl:
                return False

            return downloadUrl

        except AttributeError:
            log.debug('Nothing found.')

        return False
开发者ID:KiwiLostInMelb,项目名称:CouchPotato,代码行数:25,代码来源:updater.py

示例4: findViaAlternative

    def findViaAlternative(self, movie):
        results = {'480p':[], '720p':[], '1080p':[]}

        arguments = urlencode({
            's':movie
        })
        url = "%s?%s" % (self.backupUrl, arguments)
        log.info('Searching %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('div')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.findAll('h2', text = re.compile(movie))

            for h2 in resultTable:
                if 'trailer' in h2.lower():
                    parent = h2.parent.parent.parent
                    trailerLinks = parent.findAll('a', text = re.compile('480p|720p|1080p'))
                    try:
                        for trailer in trailerLinks:
                            results[trailer].insert(0, trailer.parent['href'])
                    except:
                        pass


        except AttributeError:
            log.debug('No trailers found in via alternative.')

        return results
开发者ID:andme,项目名称:CouchPotato,代码行数:35,代码来源:hdtrailers.py

示例5: findByProvider

    def findByProvider(self, data, provider):

        results = {'480p':[], '720p':[], '1080p':[]}
        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'class':'bottomTable'})


            for tr in resultTable.findAll('tr'):
                trtext = str(tr).lower()
                if 'clips' in trtext:
                    break
                if 'trailer' in trtext and not 'clip' in trtext and provider in trtext:
                    nr = 0
                    resolutions = tr.findAll('td', attrs = {'class':'bottomTableResolution'})
                    #sizes = tr.findNext('tr').findAll('td', attrs = {'class':'bottomTableFileSize'})
                    for res in resolutions:
                        results[str(res.a.contents[0])].insert(0, res.a['href'])
                        #int(sizes[nr].contents[0].replace('MB', ''))
                        nr += 1

            return results

        except AttributeError:
            log.debug('No trailers found in provider %s.' % provider)
            results['404'] = True

        return results
开发者ID:andme,项目名称:CouchPotato,代码行数:29,代码来源:hdtrailers.py

示例6: find

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)
        data = urllib.urlopen(url)

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'class':'requests'})
            for result in resultTable.findAll('tr', attrs = {'class':'req_filled'}):
                new = self.feedItem()

                id = result.find('td', attrs = {'class':'reqid'})
                new.id = id.contents[0]
                name = result.find('td', attrs = {'class':'release'})
                new.name = self.toSaveString(name.contents[0])
                new.size = 9999
                new.content = 'x264'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')
开发者ID:maaso,项目名称:CouchPotato,代码行数:35,代码来源:x264.py

示例7: getItems

    def getItems(self, data):

        results = []

        soup = BeautifulSoup(data)
        table = soup.find("table", { "class" : "filmSubtitleList" })

        try:
            for tr in table.findAll("tr"):
                item = {}

                for td in tr.findAll('td'):
                    if td.a:
                        spans = td.a.findAll('span')
                        if len(spans) == 2:
                            item['id'] = int(spans[1].get('id').replace('r', ''))
                            item['name'] = str(spans[1].contents[0]).strip()
                            item['rating'] = int(spans[0].get('class', '0').replace('r', ''))

                            # Language
                            lang = str(spans[0].contents[0]).strip()
                            item['language'] = self.languages.get(lang, lang)
                    if td.div:
                        item['hi'] = td.div.get('id') == 'imgEar'

                if item.get('name'):
                    results.append(item)
        except AttributeError:
            log.error('No search results.')

        return results
开发者ID:censer,项目名称:CouchPotato,代码行数:31,代码来源:subscene.py

示例8: getDetails

    def getDetails(self, id):
        url = self.detailUrl + str(id)

        log.info('Scanning %s.' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return False

        # Search for theater release
        theaterDate = 0
        try:
            theaterLink = SoupStrainer('a', href = re.compile('/month_theaters.html\?'))
            theater = BeautifulSoup(data, parseOnlyThese = theaterLink)
            theaterDate = int(time.mktime(parse(theater.a.contents[0]).timetuple()))
        except AttributeError:
            log.debug('No Theater release info found.')

        # Search for dvd release date
        dvdDate = 0
        try:
            try:
                dvdLink = SoupStrainer('a', href = re.compile('/month_video.html\?'))
                dvd = BeautifulSoup(data, parseOnlyThese = dvdLink)
                dvdDate = int(time.mktime(parse(dvd.a.contents[0]).timetuple()))
            except:
                pass

            # Try left column
            if not dvdDate:
                dvdReleases = SoupStrainer('p', text = re.compile('Released'))
                dvd = BeautifulSoup(data, parseOnlyThese = dvdReleases)
                for date in dvd:
                    foundDate = int(time.mktime(parse(date.replace('Released', '')).timetuple()))
                    dvdDate = foundDate if foundDate > dvdDate else dvdDate

        except AttributeError:
            log.debug('No DVD release info found.')

        # Does it have blu-ray release?
        bluray = []
        try:
            bees = SoupStrainer('b')
            soup = BeautifulSoup(data, parseOnlyThese = bees)
            bluray = soup.findAll('b', text = re.compile('Blu-ray'))
        except AttributeError:
            log.info('No Bluray release info found.')

        dates = {
            'id': id,
            'dvd': dvdDate,
            'theater': theaterDate,
            'bluray': len(bluray) > 0
        }
        log.debug('Found: %s' % dates)
        return dates
开发者ID:bjensen,项目名称:CouchPotato,代码行数:58,代码来源:eta.py

示例9: getInfo

    def getInfo(self, url):
        log.debug('Getting info: %s' % url)
        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
            pass
        except IOError:
            log.error('Failed to open %s.' % url)
            return ''

        div = SoupStrainer('div')
        html = BeautifulSoup(data, parseOnlyThese = div)
        html = html.find('div', attrs = {'class':'nfo'})
        return str(html).decode("utf-8", "replace")
开发者ID:Belgar,项目名称:CouchPotato,代码行数:13,代码来源:tpb.py

示例10: getInfo

    def getInfo(self, url):
        log.debug('Getting info: %s' % url)
        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
            pass
        except IOError:
            log.error('Failed to open %s.' % url)
            return ''

        tables = SoupStrainer('table')
        html = BeautifulSoup(data)
        movieInformation = html.find('div', attrs = {'class':'i_info'})
        return str(movieInformation).decode("utf-8", "replace")
开发者ID:bwq,项目名称:CouchPotato,代码行数:13,代码来源:sceneaccess.py

示例11: find

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.searchUrl):
            return results

        url = self.searchUrl % quote_plus(self.toSearchString(movie.name + ' ' + quality))
        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultable = html.find('table', attrs = {'class':'t'})
            for result in resultable.findAll('span', attrs = {'class':'cname'}):
                new = self.feedItem()
                a = result.find('a')
                id = re.search('(?<=detail\?c\=)\w+', a['href'])
                new.id = id.group(0)
                text = a.findAll(text = True)
                words = ''
                for text in a.findAll(text = True):
                    words = words + unicode(text).encode('utf-8')
                new.name = words
                new.size = 9999
                new.content = 'mysterbin'
                new.type = 'nzb'
                new.url = self.downloadUrl % (new.id)
                new.date = time.time()
                new.score = self.calcScore(new, movie)

                if self.isCorrectMovie(new, movie, type):
                    results.append(new)
                    log.info('Found: %s' % new.name)
            return results

        except AttributeError:
            log.debug('No search results found.')

        return results
开发者ID:Belgar,项目名称:CouchPotato,代码行数:45,代码来源:mysterbin.py

示例12: find

    def find(self, movie, quality, type):

        results = []
        if not self.enabled() or not self.isAvailable(self.apiUrl):
            return results

        url = self.apiUrl % (quote_plus(self.toSearchString(movie.name + ' ' + quality) + self.makeIgnoreString(type)), self.getCatId(type))

        log.info('Searching: %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return results

        try:
            tables = SoupStrainer('table')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.find('table', attrs = {'id':'searchResult'})
            for result in resultTable.findAll('tr'):
                details = result.find('a', attrs = {'class':'detLink'})
                if details:
                    href = re.search('/(?P<id>\d+)/', details['href'])
                    id = href.group('id')
                    name = self.toSaveString(details.contents[0])
                    desc = result.find('font', attrs = {'class':'detDesc'}).contents[0].split(',')
                    date = ''
                    size = 0
                    for item in desc:
                        # Weird date stuff
                        if 'uploaded' in item.lower():
                            date = item.replace('Uploaded', '')
                            date = date.replace('Today', '')

                            # Do something with yesterday
                            yesterdayMinus = 0
                            if 'Y-day' in date:
                                date = date.replace('Y-day', '')
                                yesterdayMinus = 86400

                            datestring = date.replace('&nbsp;', ' ').strip()
                            date = int(time.mktime(parse(datestring).timetuple())) - yesterdayMinus
                        # size
                        elif 'size' in item.lower():
                            size = item.replace('Size', '')

                    seedleech = []
                    for td in result.findAll('td'):
                        try:
                            seedleech.append(int(td.contents[0]))
                        except ValueError:
                            pass

                    seeders = 0
                    leechers = 0
                    if len(seedleech) == 2 and seedleech[0] > 0 and seedleech[1] > 0:
                        seeders = seedleech[0]
                        leechers = seedleech[1]

                    # to item
                    new = self.feedItem()
                    new.id = id
                    new.type = 'torrent'
                    new.name = name
                    new.date = date
                    new.size = self.parseSize(size)
                    new.seeders = seeders
                    new.leechers = leechers
                    new.url = self.downloadLink(id, name)
                    new.score = self.calcScore(new, movie) + self.uploader(result) + (seeders / 10)

                    if seeders > 0 and (new.date + (int(self.conf('wait')) * 60 * 60) < time.time()) and Qualities.types.get(type).get('minSize') <= new.size:
                        new.detailUrl = self.detailLink(id)
                        new.content = self.getInfo(new.detailUrl)
                        if self.isCorrectMovie(new, movie, type):
                            results.append(new)
                            log.info('Found: %s' % new.name)

            return results

        except AttributeError:
            log.debug('No search results found.')

        return []
开发者ID:Belgar,项目名称:CouchPotato,代码行数:85,代码来源:tpb.py

示例13: download

    def download(self, subtitle):

        subtitle = subtitle['subtitles'].pop()
        url = self.downloadUrl % subtitle['id']

        try:
            data = self.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError):
            log.error('Failed to open %s.' % url)
            return False

        soup = BeautifulSoup(data)

        postUrl = self.siteUrl + soup.find("a", {'id' : 's_lc_bcr_downloadLink' }).get('href').split('"')[-2]
        typeId = soup.find("input", {"name" : "typeId" }).get('value')
        params = urllib.urlencode({
           '__EVENTTARGET': 's$lc$bcr$downloadLink',
           '__EVENTARGUMENT': '',
           '__VIEWSTATE': soup.find("input", {"id" : "__VIEWSTATE" }).get('value'),
           '__PREVIOUSPAGE': soup.find("input", { "id" : "__PREVIOUSPAGE" }).get('value'),
           'subtitleId': soup.find("input", {"id" : "subtitleId" }).get('value'),
           'typeId': typeId,
           'filmId': soup.find("input", {"name" : "filmId" }).get('value')
        })

        # No unrarring yet
        if 'rar' in typeId:
            log.error('Unrar not supported yet.')
            return False

        req = urllib2.Request(postUrl, headers = {
            'Referer' : url,
            'User-Agent' : 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8'
        })

        subtitleFiles = []
        try:
            self.wait()
            data = urllib2.urlopen(req, params)
            self.lastUse = time.time()
            hash = hashlib.md5(url).hexdigest()

            tempdir = cherrypy.config.get('cachePath')
            tempSubtitleFile = os.path.join(tempdir, hash + '.' + typeId)

            # Remove the old
            if os.path.isfile(tempSubtitleFile): os.remove(tempSubtitleFile)

            with open(tempSubtitleFile, 'wb') as f:
                f.write(data.read())

            if 'zip' in typeId:
                zip = ZipFile(tempSubtitleFile)

                extract = []
                for name in zip.namelist():
                    for ext in self.extensions:
                        if ext.replace('*', '') in name:
                            subtitleFiles.append(os.path.join(tempdir, name))
                            extract.append(name)

                zip.extractall(tempdir, extract)
                os.remove(tempSubtitleFile)
            else:
                subtitleFiles.append(tempSubtitleFile)

            log.info('Subtitle download "%s" finished. %dKB.' % (subtitle['name'], int(data.info().getheaders("Content-Length")[0]) / 1024))
            return subtitleFiles

        except:
            log.error('Subtitle download %s failed.' % subtitle['name'])
            return False
开发者ID:censer,项目名称:CouchPotato,代码行数:72,代码来源:subscene.py

示例14: urlencode

        arguments = urlencode({
            's':movie
        })
        url = "%s?%s" % (self.backupUrl, arguments)
        log.debug('Searching %s' % url)

        try:
            data = urllib2.urlopen(url, timeout = self.timeout).read()
        except (IOError, URLError), e:
            log.debug('Failed to open %s. %s' % (url, e))
            return results

        try:
            tables = SoupStrainer('div')
            html = BeautifulSoup(data, parseOnlyThese = tables)
            resultTable = html.findAll('h2', text = re.compile(movie))

            for h2 in resultTable:
                if 'trailer' in h2.lower():
                    parent = h2.parent.parent.parent
                    trailerLinks = parent.findAll('a', text = re.compile('480p|720p|1080p'))
                    try:
                        for trailer in trailerLinks:
                            results[trailer].insert(0, trailer.parent['href'])
                    except:
                        pass


        except AttributeError:
            log.debug('No trailers found in via alternative.')
开发者ID:Amelandbor,项目名称:CouchPotato,代码行数:30,代码来源:hdtrailers.py


注:本文中的imdb.parser.http.bsouplxml._bsoup.BeautifulSoup类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。