本文整理汇总了Python中scraper.Scraper.seturldata方法的典型用法代码示例。如果您正苦于以下问题:Python Scraper.seturldata方法的具体用法?Python Scraper.seturldata怎么用?Python Scraper.seturldata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scraper.Scraper
的用法示例。
在下文中一共展示了Scraper.seturldata方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ScraperWrapper
# 需要导入模块: from scraper import Scraper [as 别名]
# 或者: from scraper.Scraper import seturldata [as 别名]
#.........这里部分代码省略.........
else:
threading.Timer(self.interval, self.broadcastavailable).start()
def broadcaststatus(self):
isodatetime = strftime("%Y-%m-%d %H:%M:%S")
packet = {
'status': self.scraper.status,
'urldata': self.status['urldata'],
'statusdatetime': str(isodatetime)
}
payload = {
'command': 'scraper_status',
'sourceid': self.uid,
'destinationid': 'broadcast',
'message': packet
}
jbody = simplejson.dumps(payload)
#time.sleep(.5)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
def broadcastsimplestatus(self):
isodatetime = strftime("%Y-%m-%d %H:%M:%S")
if self.scraper.status['urldata'] == {}:
targeturl = 'null'
else:
targeturl = self.scraper.status['urldata']['targeturl']
packet = {
'busy': self.scraper.status['busy'],
'linkcount': self.scraper.status['linkcount'],
'processedlinkcount': len(self.scraper.status['processed']),
'badlinkcount': len(self.scraper.status['badlinks']),
'targeturl': targeturl,
'statusdatetime': str(isodatetime)
}
payload = {
'command': 'scraper_status_simple',
'sourceid': self.uid,
'destinationid': 'broadcast',
'message': packet
}
jbody = simplejson.dumps(payload)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
def scraperFinishedCallback(self,payload):
jbody = simplejson.dumps(payload)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
return
def scraperStartedCallback(self,payload):
jbody = simplejson.dumps(payload)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
return
def scraperBroadcastDocCallback(self,payload):
jbody = simplejson.dumps(payload)
self.respchan.basic_publish(exchange=self.exchange,routing_key='',body=jbody)
return
# message handler
def reqcallback(self,ch,method,properties,body):
try:
response = simplejson.loads(body)
if self.DEBUG:
print "Processing Message:\n\t{0}".format(response['command'])
if response['command'] == 'url_dispatch':
if response['destinationid'] == self.uid:
#print "URL Dispatch Command Seen."
#print response
if self.scraping == False:
#print "[Wrapper] Launching Scraper on URL: '{0}'".format(response['message']['targeturl'])
self.scraper.seturldata(response['message'])
if self.scraper.started == False:
self.scraper.start()
self.scraper.begin()
self.scraping = True
elif response['command'] == 'scraper_finished':
if response['sourceid'] == self.scraper.uid:
self.scraping = False
elif response['command'] == 'get_status':
self.broadcaststatus()
elif response['command'] == 'get_status_simple':
self.broadcastsimplestatus()
elif response['command'] == 'shutdown':
if response['destinationid'] == self.uid:
print "[{0}] Shutting Down Recieved".format(self.uid)
self.stop()
elif response['command'] == 'global_shutdown':
print "Global Shutdown Recieved"
self.stop()
except:
if self.DEBUG:
print "Message Error"