当前位置: 首页>>代码示例>>Python>>正文


Python Scraper.scrape方法代码示例

本文整理汇总了Python中scraper.Scraper.scrape方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.scrape方法的具体用法?Python Scraper.scrape怎么用?Python Scraper.scrape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scraper.Scraper的用法示例。


在下文中一共展示了Scraper.scrape方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
 def execute(self):
     myScraper = Scraper(self.url,self.matchingDict)
     result = myScraper.scrape()
     if self.target is None:
         return result
     else:
         self.target(result, self.url)
开发者ID:PhilipFraDIKU,项目名称:Projects-in-Stock,代码行数:9,代码来源:adapter.py

示例2: __init__

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
class Downloader:
    def __init__(self, outdir):
        self.scrpr = Scraper()
        self.scrpr.scrape()
        self.size = 0
        self.outdir = outdir

    def getFileUrls(self):
        self.fileUrls = self.scrpr.getZipLinks()

    def getFileSize(self, url, urlobj):
        meta = urlobj.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        self.size += file_size

    def openUrl(self, url):
        urlobj = urllib2.urlopen(url)
        return urlobj

    def iterateThroughFiles(self):
        self.getFileUrls()
        for url in self.fileUrls:
            urlobj = self.openUrl(url)
            [dir, fileName] = self.getFileYearAndName(url)
            dirPath = self.mkDirectory(dir)
            self.getFileSize(url, urlobj)
            self.saveFile(fileName, dirPath, urlobj)
        print "Total file size is : %d"%(self.size)

    def getFileYearAndName(self, url):
        return [url.split('/')[-2], url.split('/')[-1]]

    def mkDirectory(self, dir):
        if not os.path.exists(os.path.join(self.outdir, dir) ):
            os.mkdir(os.path.join(self.outdir, dir))
        return os.path.join(self.outdir, dir)

    def saveFile(self, fileName, dirPath, urlobj):
        fp = open(os.path.join(dirPath, fileName), 'wb')
        block_size = 8192
        while True:
            buffer = urlobj.read(block_size)
            if not buffer:
                break
            fp.write(buffer)
        fp.close()
        print "Downloaded : %s"%(os.path.join(dirPath, fileName))
开发者ID:akulkarni-bk,项目名称:easypatent,代码行数:49,代码来源:downloader.py

示例3: performUpdate

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def performUpdate():
    scr = Scraper()
    try: 
        for competitorListChunk in scr.scrape():
            for competitor in competitorListChunk:
                try:
                    Contestant.update_completed_count(competitor.username.lower(),
                        competitor.completedCount)
                except Exception as e:
                    print("ERR: Username most likely not found in spreadsheet {}. {}".format(
                        competitor.username, str(e)))
    except Exception:
        return
开发者ID:ucsd-wic-bpc,项目名称:balloon-dash,代码行数:15,代码来源:dash.py

示例4: main

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def main():
  scp = Scraper('sfbay')
  scp.scrape(2500)
  scp.save('create')
开发者ID:singhpawan,项目名称:craigdeals,代码行数:6,代码来源:test.py

示例5: callback

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def callback(ch, method, properties, body):
    url = json.loads(body)['url']
    scraper = Scraper()
    result = scraper.scrape(url)
    publish_result(result)
开发者ID:BernhardWenzel,项目名称:scraping-microservice-java-python-rabbitmq,代码行数:7,代码来源:worker.py

示例6: range

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
			if os.path.exists(outfile):
				urlcount -= 1
				print "File already exists. Getting next..."
			else:
				break
		
		self.scrape(url,outfile)
	except KeyError:
		if urlcount == 0: sys.exit()

print "Initialising scrapers"
for _ in range(0,SCRAPER_POOL):
	s = Scraper(app,fetchNext)
	try:
		while True:
			url = "http://www.boxoffice.com%s"%urlset.pop().strip()
			split = urlparse.urlsplit(url)
			outfile = "output/%s.csv"%split.path.strip().split("/")[-1]
			if os.path.exists(outfile):
				print "File already exists. Getting next..."
				urlcount -= 1
			else:
				break
		
		s.scrape(url,outfile)
	except KeyError:
		if urlcount == 0: sys.exit()

signal.signal(signal.SIGINT, signal.SIG_DFL)
app.exec_()
开发者ID:jamesjohnson92,项目名称:mm-crawl,代码行数:32,代码来源:main.py

示例7: DentonBot

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
class DentonBot(pyrc.Bot):
    def __init__(self, *args, **kwargs):
        super(DentonBot, self).__init__(*args, **kwargs)
        self.scraper = Scraper()

        # todo: add mangastream to these as well
        self.scraper.add_manga("One Piece", ["mangahere"])
        self.scraper.add_manga("Bleach", ["mangahere"])
        self.scraper.add_manga("Naruto", ["mangahere"])
        self.scraper.add_manga("Fairy Tail", ["mangahere"])

        self.scraper.add_manga("Toriko", ["mangahere"])
        self.scraper.add_manga("Cage of Eden", ["mangahere"])
        self.scraper.add_manga("Gamaran", ["mangahere"])
        self.scraper.add_manga("Magi", ["mangahere"])
        self.scraper.add_manga("Shaman King Flowers", ["mangahere"])
        self.scraper.add_manga("Soul Eater", ["mangahere"])
        self.scraper.add_manga("The Breaker: New Waves", ["mangahere"])
        self.scraper.add_manga("The World God Only Knows", ["mangahere"])
        self.scraper.add_manga("Until Death Do Us Part", ["mangahere"])
        self.scraper.add_manga("Witch Hunter", ["mangahere"])
        self.scraper.add_manga("Yotsubato!", ["mangahere"])

    @hooks.command(r"help|advice")
    def help(self, channel):
        self.message(channel, random.choice(advice))

    @hooks.command(r"list|registered")
    def registered(self, channel):
        manga = self.scraper.registered_manga()
        manga_str = ", ".join(manga)
        self.message(channel, "Registered manga include %s" % manga_str)

    @hooks.command("fetch manga")
    def fetch_manga(self, channel):
        results = self.scraper.get_all_manga()
        for (name, chapter, link) in results:
            self.message(channel, "%s %i: %s" % (name, chapter, link))

    @hooks.command(r"last (?P<manga_name>.*)")
    def last(self, channel, manga_name):
        manga_name = manga_name.strip()
        manga_name = aliases.get(manga_name, manga_name)
        manga_tuple = self.scraper.get_manga(manga_name)
        if manga_tuple and not manga_tuple[2]:
            self.message(channel, "Sorry, don't have the latest %s" % manga_tuple[0])
        elif manga_tuple:
            self.message(channel, "%s %i: %s" % manga_tuple)
        else:
            self.message(channel, "I'm not big into books")

    @hooks.interval(2 * 60 * 1000)
    def scrape(self):
        results = self.scraper.scrape()
        for (name, chapter, link) in results:
            msg = "New %s (%i): %s" % (name, chapter, link)
            for channel in CHANNELS:
                self.message(channel, msg)

    def close(self):
        super(DentonBot, self).close()
开发者ID:CarlosFdez,项目名称:denton_manga_scraper,代码行数:63,代码来源:main.py

示例8: Scraper

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
import mysql.connector

from random import normalvariate
from arbitrage import Arbitrage
from scraper import Scraper

# initialize the database, games, and scraper
cnx = mysql.connector.connect(user='root',
                                   password='Q!w2E#r4',
                                   database='sports_betting')
cursor = cnx.cursor()
scraper = Scraper("sites.csv", "games.csv", cnx, cursor)
    
# scrape forever
while True:
    print 'scrape:', str(scraper.scrape_id)

    # get lines from each site
    lines = scraper.scrape()
    print 'lines:', str(lines)

    # look for arbitrage
    # arbitrage = Arbitrage(lines)
    # arbitrage.check()

    # chill
    time.sleep(normalvariate(5, 1))

cursor.close()
cnx.close()
开发者ID:mikeurbach,项目名称:sports_betting,代码行数:32,代码来源:main.py

示例9: QApplication

# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
#!/usr/bin/env python2
from scraper import Scraper
from PyQt4.QtGui import QApplication
import sys


app = QApplication(sys.argv)
def dummy(self):
	print "Stupid method"
scraper = Scraper(app,dummy)
scraper.scrape("http://www.boxoffice.com/statistics/movies/fast-five-2011","test.csv")
app.exec_()


开发者ID:jamesjohnson92,项目名称:mm-crawl,代码行数:14,代码来源:test.py


注:本文中的scraper.Scraper.scrape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。