当前位置: 首页>>代码示例>>Python>>正文


Python dispatcher.connect方法代码示例

本文整理汇总了Python中scrapy.xlib.pydispatch.dispatcher.connect方法的典型用法代码示例。如果您正苦于以下问题:Python dispatcher.connect方法的具体用法?Python dispatcher.connect怎么用?Python dispatcher.connect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.xlib.pydispatch.dispatcher的用法示例。


在下文中一共展示了dispatcher.connect方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self,rule):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.rule = rule
        self.name = rule.name
        self.allowed_domains = rule.allowed_domains.split(',')
        self.start_urls = rule.start_urls.split(',')
        rule_list = []

        # ??`???`???
        if len(rule.next_page):
            rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True))

        rule_list.append(Rule(LinkExtractor(
            allow=rule.allow_url.split(','),
            unique=True),
            follow=True,
            callback='parse_item'))

        self.rules = tuple(rule_list)
        super(ProxySpiderSpider, self).__init__() 
开发者ID:leeyis,项目名称:ip_proxy_pool,代码行数:23,代码来源:proxy_spider.py

示例2: start_requests

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def start_requests(self):
        """
        NOTE: This method is ONLY CALLED ONCE by Scrapy (to kick things off).
        Get the first url to crawl and return a Request object
        This will be parsed to self.parse which will continue
        the process of parsing all the other generated URLs
        """
        if not self.args:
            # connect to mysql database
            self.url.connect()

            # grab the first URL to begin crawling
            start_url = self.url.next_url().next()
        else:
            start_url = self.start_urls[0]

        request = Request(start_url, dont_filter=True)

        # important to yield, not return
        yield request 
开发者ID:santoshghimire,项目名称:AmazonScraping,代码行数:22,代码来源:adc_spider.py

示例3: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self):
        dispatcher.connect(self.spider_opended, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

        # ????????????scrapy_site??????
        self.curpath = os.getcwd()
        #?????????????
        self.spidername_filepath = self.curpath + "/scrapy_site/msg/"

        # ?????keyword.conf????????
        self.keywordsDict = dict()
        self.getKeywords()

        #????????????
        self.webnamesDict = dict()
        self.getWebnames()

        # ????
        self.msgDict = dict()

        SavePipeline.initCount = SavePipeline.initCount + 1 
开发者ID:hl10502,项目名称:scrapy_site,代码行数:25,代码来源:pipelines.py

示例4: main

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def main():
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item extracted:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# definir el spider para el crawler
	crawler.crawl(EuropythonSpyder())

	# iniciar scrapy
	print "STARTING ENGINE"
	crawler.start() #iniciar el crawler llamando al spider definido
	print "ENGINE STOPPED" 
开发者ID:jmortega,项目名称:pydata_webscraping,代码行数:25,代码来源:EuropythonSpyder.py

示例5: main

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def main():
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item Extraido:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# definir el spider para el crawler
	crawler.crawl(BloggerSpider())

	# iniciar scrapy
	print "STARTING ENGINE"
	crawler.start() #iniciar el crawler llamando al spider definido
	print "ENGINE STOPPED" 
开发者ID:jmortega,项目名称:pydata_webscraping,代码行数:25,代码来源:crawlerBlog.py

示例6: main

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def main():
	from scrapy.xlib.pydispatch import dispatcher
	
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item extracted:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# define spyder for the crawler
	crawler.crawl(PydataSpiderDetails())

	print "STARTING ENGINE"
	crawler.start() #start  the crawler
	print "ENGINE STOPPED" 
开发者ID:jmortega,项目名称:pydata_webscraping,代码行数:26,代码来源:PydataSpiderDetails.py

示例7: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self,*a, **kw):
        super(StackSpider,self).__init__(*a, **kw)
        self.time = datetime.datetime.now()
        self.congress = Congress()
        self.members = self.congress.searchAll("diputados")
        self.groups = self.congress.searchAll("grupos")
        dispatcher.connect(self.whenFinish, signals.spider_closed) 
开发者ID:CIECODE-Madrid,项目名称:tipi-engine,代码行数:9,代码来源:initiatives.py

示例8: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self, *a, **kw):
        """Attach a callback to the spider_closed signal"""
        super(Kijiji, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        if USE_DB is True:
            self.open_database()
            if DRAW_ALL_DB is True and DRAW_NEW_AD_ONLY is False:
                # add already know marker
                for x in self.m_list:
                    self.add_marker(x, False) 
开发者ID:sbourdelin,项目名称:scrapyjiji,代码行数:12,代码来源:scrapyjiji.py

示例9: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self, *a, **kw):
        super(TianqiSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

        self.sql = SqlHelper()
        self.weather_table_name = config.weather_table
        self.citys = []

        self.init() 
开发者ID:awolfly9,项目名称:weather,代码行数:11,代码来源:tianqi.py

示例10: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed) 
开发者ID:mersanuzun,项目名称:alsam_mi_ki,代码行数:5,代码来源:pipelines.py

示例11: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self):
        self.filename += settings.MARKET_NAME
        self.filename += ".db"
        self.filename = path.join(settings.DATABASE_DIR, self.filename)
        print self.filename
        self.conn = None
        dispatcher.connect(self.initialize, signals.engine_started)
        dispatcher.connect(self.finalize, signals.engine_stopped) 
开发者ID:M157q,项目名称:Android-Repackaged-App-Detection-System,代码行数:10,代码来源:pipelines.py

示例12: initialize

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def initialize(self):
        if path.exists(self.filename):
            self.conn = sqlite3.connect(self.filename)
        else:
            self.create_table()
        self.conn.execute("PRAGMA journal_mode=WAL;")
        self.conn.commit() 
开发者ID:M157q,项目名称:Android-Repackaged-App-Detection-System,代码行数:9,代码来源:pipelines.py

示例13: create_table

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def create_table(self):
        self.conn = sqlite3.connect(self.filename)
        self.conn.execute("create table apps( \
                id integer primary key autoincrement, \
                url varchar(100) not null unique, \
                downloaded int default 0)"
            )
        self.conn.commit() 
开发者ID:M157q,项目名称:Android-Repackaged-App-Detection-System,代码行数:10,代码来源:pipelines.py

示例14: __init__

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def __init__(self, *args, **kwargs):
        super(FullDomainSpider, self).__init__(*args, **kwargs)
        self.allowed_domains = kwargs.get('allowed_domains').split(',')
        self.org = kwargs.get('org')
        self.start_urls = kwargs.get('start_urls').split(',')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed) 
开发者ID:ciscocsirt,项目名称:malspider,代码行数:9,代码来源:full_domain_spider.py

示例15: spider_opened

# 需要导入模块: from scrapy.xlib.pydispatch import dispatcher [as 别名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 别名]
def spider_opened(self, spider):
        self.conn = MySQLdb.connect(host=settings.MYSQL_HOST, db=settings.MYSQL_DB, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, charset='utf8', use_unicode=True)
        cursor = spider.conn.cursor()
        sql_str = "SELECT pattern from whitelist"
        cursor.execute(sql_str)
        self.custom_whitelist = cursor.fetchall()
        try:
            alexa_whitelist_file = pkgutil.get_data("malspider", "resources/alexa-1k-whitelist.csv").decode('ascii')
            self.alexa_whitelist = alexa_whitelist_file.splitlines()
        except:
            log.msg("Error loading alexa whitelist...", level=log.ERROR) 
开发者ID:ciscocsirt,项目名称:malspider,代码行数:13,代码来源:full_domain_spider.py


注:本文中的scrapy.xlib.pydispatch.dispatcher.connect方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。