本文整理汇总了Python中scrapy.log.msg方法的典型用法代码示例。如果您正苦于以下问题:Python log.msg方法的具体用法?Python log.msg怎么用?Python log.msg使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.log
的用法示例。
在下文中一共展示了log.msg方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_item
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_item(self, item, spider):
"""
Scrape edilen her girdiyi veritabanina ekle. Bu method sayfa process edildikten, icerisindeki
bilgiler cekildikten ve Item objesi olusturulduktan sonra her seferinde cagriliyor.
:param item: Parse edilmis nesne
:type item: Scrapy item
:param spider: Su anda calisan, spiders/ dizini altinda belirtilen spiderlardan herhangi biri
:type spider: Scrapy spider
:return: Gonderilen Item
:rtype: Scrapy item
"""
log.msg('[%s] PROCESSING ITEM [item no: %s, baslik: %s]' %
(spider.name, item['girdi_id'], item['baslik']),
level=log.DEBUG)
girdi = Girdi(**item)
try:
session.add(girdi)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
示例2: request_seen
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def request_seen(self, request):
is_seen = is_request_seen(request)
if not is_seen:
log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
seen = Seen(fingerprint=request_fingerprint(request),
url=request.url,
last_crawl_time=datetime.now())
try:
session.add(seen)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
else:
log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)
return is_seen
示例3: parse_hq_stock_k_1d
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def parse_hq_stock_k_1d(self, response):
json_response = json.loads(response.body_as_unicode())
if 'success' not in json_response or json_response['success'] != 'true':
log.msg('parse_hq_stock_k_1d parse failed')
return
symbol = json_response['stock']['symbol']
if json_response['chartlist']:
for chart in json_response['chartlist']:
item = StockKLineDayItem()
item['symbol'] = symbol
item['day'] = parser.parse(chart['time']).replace(tzinfo=None)
item['open_price'] = chart['open']
item['close_price'] = chart['close']
item['low_price'] = chart['low']
item['high_price'] = chart['high']
item['delta_price'] = chart.get('chg', 0)
item['turn_rate'] = chart.get('turnrate', 0)
item['delta_percent'] = chart.get('percent', 0)
item['ma5'] = chart.get('ma5', None)
item['ma10'] = chart.get('ma10', None)
item['ma20'] = chart.get('ma20', None)
item['ma30'] = chart.get('ma30', None)
item['volume'] = chart.get('volume', 0)
yield item
示例4: warn
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def warn(msg):
log.msg(str(msg), level=log.WARNING)
示例5: info
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def info(msg):
log.msg(str(msg), level=log.INFO)
示例6: debug
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def debug(msg):
log.msg(str(msg), level=log.DEBUG)
示例7: process_request
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_request(self, request, spider):
# TODO implement complex proxy providing algorithm
if self.use_proxy(request):
p = random.choice(PROXIES)
try:
request.meta['proxy'] = "http://%s" % p['ip_port']
print(request.meta['proxy'])
except Exception, e:
log.msg("Exception %s" % e, _level=log.CRITICAL)
示例8: process_request
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_request(self,request,spider):
user_agent = UserAgent()
ua = user_agent.random
if ua:
log.msg('Current UserAgent: '+ua, level=log.INFO)
request.headers.setdefault('User-Agent', ua)
示例9: process_spider_output
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_spider_output(self, response, result, spider):
for x in result:
if isinstance(x, Request) and hasattr(spider, 'disallow_urls'):
if self.should_follow(x, spider):
yield x
else:
log.msg("Filtered URL %s: " % (x.url),
level=log.DEBUG, spider=spider)
else:
yield x
示例10: stop
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def stop(self):
if not self._started:
raise Exception('spider not started.')
elif self._stopped:
raise Exception('spider has already stopped')
else:
log.msg('Stop')
self._stopped = True
self.crawler.stop()
示例11: parse_hq_stock_category
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def parse_hq_stock_category(self, response):
json_response = json.loads(response.body_as_unicode())
if 'industryname' not in json_response:
log.msg('parse_hq_category parse failed')
item = StockItem()
item['symbol'] = json_response['code']
item['xq_category'] = json_response['industryname']
item['zjh_category'] = ''
yield item
示例12: parse_hq_stock
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def parse_hq_stock(self, response):
for td in response.xpath('//table[@class="topTable"]/tr/td').extract():
td_selector = Selector(text=td)
name_list = td_selector.xpath('//td/text()').extract()
value_list = td_selector.xpath('//td/span/text()').extract()
if len(name_list) and len(value_list):
name = name_list[0]
value = value_list[0]
log.msg(name + '_' + value)
示例13: process_response
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_response(self, request, response, spider):
url = response.url
if response.status in [301, 307]:
log.msg("trying to redirect us: %s" % url, level=log.INFO)
reason = 'redirect %d' % response.status
return self._retry(request, reason, spider) or response
interval, redirect_url = get_meta_refresh(response)
# handle meta redirect
if redirect_url:
log.msg("trying to redirect us: %s" % url, level=log.INFO)
reason = 'meta'
return self._retry(request, reason, spider) or response
hxs = HtmlXPathSelector(response)
# test for captcha page
captcha = hxs.select(
".//input[contains(@id, 'captchacharacters')]").extract()
if captcha:
log.msg("captcha page %s" % url, level=log.INFO)
reason = 'capcha'
return self._retry(request, reason, spider) or response
return response
示例14: process_request
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def process_request(self, request, spider):
# TODO implement complex proxy providing algorithm
if self.use_proxy(request):
p = random.choice(PROXIES)
try:
request.meta['proxy'] = "http://%s" % p['ip_port']
except Exception, e:
log.msg("Exception %s" % e, _level=log.CRITICAL)
示例15: parse_hq_stock_name_list
# 需要导入模块: from scrapy import log [as 别名]
# 或者: from scrapy.log import msg [as 别名]
def parse_hq_stock_name_list(self, response):
json_response = json.loads(response.body_as_unicode())
if 'success' not in json_response or json_response['success'] != 'true':
log.msg('parse_hq_stock_name_list parse failed')
return
for stock in json_response['stocks']:
item = StockItem()
item['symbol'] = stock['symbol']
item['name'] = stock['name']
item['market'] = getmarket(stock['symbol'])
item['catelog'] = getcatelog(stock['symbol'])
yield item
request = scrapy.Request("http://xueqiu.com/stock/industry/stockList.json?type=1&code=%s&size=0" % (stock['symbol']),
cookies=self.get_cookies(),
callback=self.parse_hq_stock_category)
yield request
if item['market'] == 'PRE':
continue
request = scrapy.Request("http://xueqiu.com/v4/stock/quote.json?code=%s&_=1" % (stock['symbol']),
meta={'symbol': stock['symbol']},
cookies=self.get_cookies(),
callback=self.parse_hq_stock_basic)
yield request
request = scrapy.Request("http://xueqiu.com/S/%s" % stock['symbol'],
cookies=self.get_cookies(),
callback=self.parse_hq_stock)
# yield request
import datetime
from dateutil.relativedelta import relativedelta
now = datetime.datetime.now()
years_ago = datetime.datetime.now() - relativedelta(years=1)
datetime_to_timestamp = lambda dt: int((dt - datetime.datetime(1970, 1, 1)).total_seconds() * 1000)
begin = datetime_to_timestamp(years_ago)
end = datetime_to_timestamp(now)
request = scrapy.Request("http://xueqiu.com/stock/forchartk/stocklist.json?symbol=%s&period=1day&type=after&begin=%d&end=%d&_=1" % (stock['symbol'], begin, end),
cookies=self.get_cookies(),
callback=self.parse_hq_stock_k_1d)
yield request