本文整理汇总了Python中scraper.Scraper.scrape方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.scrape方法的具体用法?Python Scraper.scrape怎么用?Python Scraper.scrape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.scrape方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: execute
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def execute(self):
myScraper = Scraper(self.url,self.matchingDict)
result = myScraper.scrape()
if self.target is None:
return result
else:
self.target(result, self.url)
示例2: __init__
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
class Downloader:
def __init__(self, outdir):
self.scrpr = Scraper()
self.scrpr.scrape()
self.size = 0
self.outdir = outdir
def getFileUrls(self):
self.fileUrls = self.scrpr.getZipLinks()
def getFileSize(self, url, urlobj):
meta = urlobj.info()
file_size = int(meta.getheaders("Content-Length")[0])
self.size += file_size
def openUrl(self, url):
urlobj = urllib2.urlopen(url)
return urlobj
def iterateThroughFiles(self):
self.getFileUrls()
for url in self.fileUrls:
urlobj = self.openUrl(url)
[dir, fileName] = self.getFileYearAndName(url)
dirPath = self.mkDirectory(dir)
self.getFileSize(url, urlobj)
self.saveFile(fileName, dirPath, urlobj)
print "Total file size is : %d"%(self.size)
def getFileYearAndName(self, url):
return [url.split('/')[-2], url.split('/')[-1]]
def mkDirectory(self, dir):
if not os.path.exists(os.path.join(self.outdir, dir) ):
os.mkdir(os.path.join(self.outdir, dir))
return os.path.join(self.outdir, dir)
def saveFile(self, fileName, dirPath, urlobj):
fp = open(os.path.join(dirPath, fileName), 'wb')
block_size = 8192
while True:
buffer = urlobj.read(block_size)
if not buffer:
break
fp.write(buffer)
fp.close()
print "Downloaded : %s"%(os.path.join(dirPath, fileName))
示例3: performUpdate
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def performUpdate():
scr = Scraper()
try:
for competitorListChunk in scr.scrape():
for competitor in competitorListChunk:
try:
Contestant.update_completed_count(competitor.username.lower(),
competitor.completedCount)
except Exception as e:
print("ERR: Username most likely not found in spreadsheet {}. {}".format(
competitor.username, str(e)))
except Exception:
return
示例4: main
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def main():
scp = Scraper('sfbay')
scp.scrape(2500)
scp.save('create')
示例5: callback
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
def callback(ch, method, properties, body):
url = json.loads(body)['url']
scraper = Scraper()
result = scraper.scrape(url)
publish_result(result)
示例6: range
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
if os.path.exists(outfile):
urlcount -= 1
print "File already exists. Getting next..."
else:
break
self.scrape(url,outfile)
except KeyError:
if urlcount == 0: sys.exit()
print "Initialising scrapers"
for _ in range(0,SCRAPER_POOL):
s = Scraper(app,fetchNext)
try:
while True:
url = "http://www.boxoffice.com%s"%urlset.pop().strip()
split = urlparse.urlsplit(url)
outfile = "output/%s.csv"%split.path.strip().split("/")[-1]
if os.path.exists(outfile):
print "File already exists. Getting next..."
urlcount -= 1
else:
break
s.scrape(url,outfile)
except KeyError:
if urlcount == 0: sys.exit()
signal.signal(signal.SIGINT, signal.SIG_DFL)
app.exec_()
示例7: DentonBot
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
class DentonBot(pyrc.Bot):
def __init__(self, *args, **kwargs):
super(DentonBot, self).__init__(*args, **kwargs)
self.scraper = Scraper()
# todo: add mangastream to these as well
self.scraper.add_manga("One Piece", ["mangahere"])
self.scraper.add_manga("Bleach", ["mangahere"])
self.scraper.add_manga("Naruto", ["mangahere"])
self.scraper.add_manga("Fairy Tail", ["mangahere"])
self.scraper.add_manga("Toriko", ["mangahere"])
self.scraper.add_manga("Cage of Eden", ["mangahere"])
self.scraper.add_manga("Gamaran", ["mangahere"])
self.scraper.add_manga("Magi", ["mangahere"])
self.scraper.add_manga("Shaman King Flowers", ["mangahere"])
self.scraper.add_manga("Soul Eater", ["mangahere"])
self.scraper.add_manga("The Breaker: New Waves", ["mangahere"])
self.scraper.add_manga("The World God Only Knows", ["mangahere"])
self.scraper.add_manga("Until Death Do Us Part", ["mangahere"])
self.scraper.add_manga("Witch Hunter", ["mangahere"])
self.scraper.add_manga("Yotsubato!", ["mangahere"])
@hooks.command(r"help|advice")
def help(self, channel):
self.message(channel, random.choice(advice))
@hooks.command(r"list|registered")
def registered(self, channel):
manga = self.scraper.registered_manga()
manga_str = ", ".join(manga)
self.message(channel, "Registered manga include %s" % manga_str)
@hooks.command("fetch manga")
def fetch_manga(self, channel):
results = self.scraper.get_all_manga()
for (name, chapter, link) in results:
self.message(channel, "%s %i: %s" % (name, chapter, link))
@hooks.command(r"last (?P<manga_name>.*)")
def last(self, channel, manga_name):
manga_name = manga_name.strip()
manga_name = aliases.get(manga_name, manga_name)
manga_tuple = self.scraper.get_manga(manga_name)
if manga_tuple and not manga_tuple[2]:
self.message(channel, "Sorry, don't have the latest %s" % manga_tuple[0])
elif manga_tuple:
self.message(channel, "%s %i: %s" % manga_tuple)
else:
self.message(channel, "I'm not big into books")
@hooks.interval(2 * 60 * 1000)
def scrape(self):
results = self.scraper.scrape()
for (name, chapter, link) in results:
msg = "New %s (%i): %s" % (name, chapter, link)
for channel in CHANNELS:
self.message(channel, msg)
def close(self):
super(DentonBot, self).close()
示例8: Scraper
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
import mysql.connector
from random import normalvariate
from arbitrage import Arbitrage
from scraper import Scraper
# initialize the database, games, and scraper
cnx = mysql.connector.connect(user='root',
password='Q!w2E#r4',
database='sports_betting')
cursor = cnx.cursor()
scraper = Scraper("sites.csv", "games.csv", cnx, cursor)
# scrape forever
while True:
print 'scrape:', str(scraper.scrape_id)
# get lines from each site
lines = scraper.scrape()
print 'lines:', str(lines)
# look for arbitrage
# arbitrage = Arbitrage(lines)
# arbitrage.check()
# chill
time.sleep(normalvariate(5, 1))
cursor.close()
cnx.close()
示例9: QApplication
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import scrape [as 别名]
#!/usr/bin/env python2
from scraper import Scraper
from PyQt4.QtGui import QApplication
import sys
app = QApplication(sys.argv)
def dummy(self):
print "Stupid method"
scraper = Scraper(app,dummy)
scraper.scrape("http://www.boxoffice.com/statistics/movies/fast-five-2011","test.csv")
app.exec_()