本文整理汇总了Python中scraper.Scraper.stop方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.stop方法的具体用法?Python Scraper.stop怎么用?Python Scraper.stop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.stop方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import stop [as 别名]
def main():
uid = str(uuid.uuid4())
print "Creating Scraper() instance ..."
scraper = Scraper(uid)
scraper.run()
print "Running tests ..."
# typelink()
test_typelink(scraper)
# checkmatch()
test_checkmatch(scraper)
# getpagelinks
test_getpagelinks(scraper)
# folowlinks()
test_followlinks(scraper)
# get scraper status
text_getstatus(scraper)
scraper.stop();
print "Done."
示例2: ScraperWrapper
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import stop [as 别名]
class ScraperWrapper(threading.Thread):
def __init__(self,address='localhost',exchange='barkingowl',DEBUG=False):
threading.Thread.__init__(self)
self.uid = str(uuid.uuid4())
self.address = address
self.exchange = exchange
self.DEBUG=DEBUG
self.interval = 1
# create scraper instance
self.scraper = Scraper(uid=self.uid,DEBUG=DEBUG)
self.scraping = False
#setup message bus
self.respcon = pika.BlockingConnection(pika.ConnectionParameters(
host=self.address))
self.respchan = self.respcon.channel()
self.respchan.exchange_declare(exchange=self.exchange,type='fanout')
self.reqcon = pika.BlockingConnection(pika.ConnectionParameters(host=address))
self.reqchan = self.reqcon.channel()
self.reqchan.exchange_declare(exchange=exchange,type='fanout')
result = self.reqchan.queue_declare(exclusive=True)
queue_name = result.method.queue
self.reqchan.queue_bind(exchange=exchange,queue=queue_name)
self.reqchan.basic_consume(self.reqcallback,queue=queue_name,no_ack=True)
if self.DEBUG:
print "Scraper Wrapper INIT complete."
def run(self):
# setup call backs
self.scraper.setFinishedCallback(self.scraperFinishedCallback)
self.scraper.setStartedCallback(self.scraperStartedCallback)
self.scraper.setBroadcastDocCallback(self.scraperBroadcastDocCallback)
# broadcast availability
self.broadcastavailable()
self.reqchan.start_consuming()
def stop(self):
self.scraper.stop()
self.reqchan.stop_consuming()
def broadcastavailable(self):
if self.scraper.status['busy'] == True:
# we are currently scraping, so we are not available - don't broadcast
return
isodatetime = strftime("%Y-%m-%d %H:%M:%S")
packet = {
'availabledatetime': str(isodatetime)
}
payload = {
'command': 'scraper_available',
'sourceid': self.uid,
'destinationid': 'broadcast',
'message': packet
}
jbody = simplejson.dumps(payload)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
#
# TODO: move this over to it's own timer, no need to do it here.
#
if self.scraper.stopped():
raise Exception("Scraper Wrapper Exiting")
else:
threading.Timer(self.interval, self.broadcastavailable).start()
def broadcaststatus(self):
isodatetime = strftime("%Y-%m-%d %H:%M:%S")
packet = {
'status': self.scraper.status,
'urldata': self.status['urldata'],
'statusdatetime': str(isodatetime)
}
payload = {
'command': 'scraper_status',
'sourceid': self.uid,
'destinationid': 'broadcast',
'message': packet
}
jbody = simplejson.dumps(payload)
#time.sleep(.5)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
def broadcastsimplestatus(self):
isodatetime = strftime("%Y-%m-%d %H:%M:%S")
if self.scraper.status['urldata'] == {}:
targeturl = 'null'
else:
targeturl = self.scraper.status['urldata']['targeturl']
packet = {
'busy': self.scraper.status['busy'],
'linkcount': self.scraper.status['linkcount'],
#.........这里部分代码省略.........
示例3: ScraperWrapper
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import stop [as 别名]
class ScraperWrapper(object): #threading.Thread):
def __init__(self,
address='localhost',
exchange='barkingowl',
heartbeat_interval=30,
url_parameters=None,
broadcast_interval=5,
uid=str(uuid.uuid4()),
DEBUG=False):
#threading.Thread.__init__(self,name="ScraperWrapper : %s" % uid)
self.uid = str(uuid.uuid4())
self.address = address
self.exchange = exchange
self.heartbeat_interval = heartbeat_interval
self.url_parameters = url_parameters
self.broadcast_interval = broadcast_interval
self.uid = uid
self._DEBUG=DEBUG
print "ScraperWrapper().__init__(): Creating scraper ..."
self.scraper = Scraper(
DEBUG = self._DEBUG,
)
self.scraping = False
self.scraper_thread = None
print "ScraperWrapper().__init__(): Scraper Created."
self.stopped = False
self.bus_access = BusAccess(
uid = self.uid,
address = self.address,
exchange = self.exchange,
heartbeat_interval = self.heartbeat_interval,
url_parameters = self.url_parameters,
DEBUG = self._DEBUG,
)
self.bus_access.set_callback(
callback = self._reqcallback,
)
#threading.Timer(self.interval, self.broadcast_available).start()
#threading.Timer(self.interval, self.broadcast_simple_status).start()
#self.broadcast_status()
#log( "ScraperWrapper.__init__(): Scraper Wrapper INIT complete.", self.DEBUG )
#def run(self):
def start(self):
self.scraper.set_callbacks(
start_callback = self.scraper_started_callback,
finished_callback = self.scraper_finished_callback,
found_doc_callback = self.scraper_broadcast_document_callback,
new_url_callback = None,
bandwidth_limit_callback = None,
memory_limit_callback = None,
error_callback = None,
)
self.broadcast_status()
def stop(self):
self.bus_access.stop_listening()
self.scraper.stop()
self.stopped = True
def reset_scraper(self):
self.scraper.reset()
def broadcast_status(self):
if self._DEBUG == True:
print "ScraperWrapper().broadcast_status(): Entering status loop."
while not self.scraping and not self.stopped:
if self._DEBUG == True:
print "ScraperWrapper.broadcast_status() sending status pulse ..."
if self.scraping == False and self.scraper._data['working'] == False:
packet = {
'available_datetime': str(datetime.datetime.now())
}
self.bus_access.send_message(
command = 'scraper_available',
destination_id = 'broadcast',
message = packet,
)
'''
packet = {
'working': self.scraper._data['working'],
'seen_url_count': len(self.scraper._data['seen_urls']),
#.........这里部分代码省略.........