當前位置: 首頁>>代碼示例>>Python>>正文


Python dispatcher.connect方法代碼示例

本文整理匯總了Python中scrapy.xlib.pydispatch.dispatcher.connect方法的典型用法代碼示例。如果您正苦於以下問題:Python dispatcher.connect方法的具體用法?Python dispatcher.connect怎麽用?Python dispatcher.connect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.xlib.pydispatch.dispatcher的用法示例。


在下文中一共展示了dispatcher.connect方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self,rule):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.rule = rule
        self.name = rule.name
        self.allowed_domains = rule.allowed_domains.split(',')
        self.start_urls = rule.start_urls.split(',')
        rule_list = []

        # ??`???`???
        if len(rule.next_page):
            rule_list.append(Rule(LinkExtractor(restrict_xpaths=rule.next_page), follow=True))

        rule_list.append(Rule(LinkExtractor(
            allow=rule.allow_url.split(','),
            unique=True),
            follow=True,
            callback='parse_item'))

        self.rules = tuple(rule_list)
        super(ProxySpiderSpider, self).__init__() 
開發者ID:leeyis,項目名稱:ip_proxy_pool,代碼行數:23,代碼來源:proxy_spider.py

示例2: start_requests

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def start_requests(self):
        """
        NOTE: This method is ONLY CALLED ONCE by Scrapy (to kick things off).
        Get the first url to crawl and return a Request object
        This will be parsed to self.parse which will continue
        the process of parsing all the other generated URLs
        """
        if not self.args:
            # connect to mysql database
            self.url.connect()

            # grab the first URL to begin crawling
            start_url = self.url.next_url().next()
        else:
            start_url = self.start_urls[0]

        request = Request(start_url, dont_filter=True)

        # important to yield, not return
        yield request 
開發者ID:santoshghimire,項目名稱:AmazonScraping,代碼行數:22,代碼來源:adc_spider.py

示例3: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self):
        dispatcher.connect(self.spider_opended, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_stopped, signals.engine_stopped)
        dispatcher.connect(self.engine_started, signals.engine_started)

        # ????????????scrapy_site??????
        self.curpath = os.getcwd()
        #?????????????
        self.spidername_filepath = self.curpath + "/scrapy_site/msg/"

        # ?????keyword.conf????????
        self.keywordsDict = dict()
        self.getKeywords()

        #????????????
        self.webnamesDict = dict()
        self.getWebnames()

        # ????
        self.msgDict = dict()

        SavePipeline.initCount = SavePipeline.initCount + 1 
開發者ID:hl10502,項目名稱:scrapy_site,代碼行數:25,代碼來源:pipelines.py

示例4: main

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def main():
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item extracted:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# definir el spider para el crawler
	crawler.crawl(EuropythonSpyder())

	# iniciar scrapy
	print "STARTING ENGINE"
	crawler.start() #iniciar el crawler llamando al spider definido
	print "ENGINE STOPPED" 
開發者ID:jmortega,項目名稱:pydata_webscraping,代碼行數:25,代碼來源:EuropythonSpyder.py

示例5: main

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def main():
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item Extraido:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# definir el spider para el crawler
	crawler.crawl(BloggerSpider())

	# iniciar scrapy
	print "STARTING ENGINE"
	crawler.start() #iniciar el crawler llamando al spider definido
	print "ENGINE STOPPED" 
開發者ID:jmortega,項目名稱:pydata_webscraping,代碼行數:25,代碼來源:crawlerBlog.py

示例6: main

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def main():
	from scrapy.xlib.pydispatch import dispatcher
	
	"""Rutina principal para la ejecución del Spider"""
	# set up signal to catch items scraped
	def catch_item(sender, item, **kwargs):
		print "Item extracted:", item
	dispatcher.connect(catch_item, signal=signals.item_passed)

	settings = Settings()
	settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
	settings.set("LOG_ENABLED",False)	

	# setup crawler
	from scrapy.crawler import CrawlerProcess

	crawler = CrawlerProcess(settings)

	# define spyder for the crawler
	crawler.crawl(PydataSpiderDetails())

	print "STARTING ENGINE"
	crawler.start() #start  the crawler
	print "ENGINE STOPPED" 
開發者ID:jmortega,項目名稱:pydata_webscraping,代碼行數:26,代碼來源:PydataSpiderDetails.py

示例7: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self,*a, **kw):
        super(StackSpider,self).__init__(*a, **kw)
        self.time = datetime.datetime.now()
        self.congress = Congress()
        self.members = self.congress.searchAll("diputados")
        self.groups = self.congress.searchAll("grupos")
        dispatcher.connect(self.whenFinish, signals.spider_closed) 
開發者ID:CIECODE-Madrid,項目名稱:tipi-engine,代碼行數:9,代碼來源:initiatives.py

示例8: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self, *a, **kw):
        """Attach a callback to the spider_closed signal"""
        super(Kijiji, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        if USE_DB is True:
            self.open_database()
            if DRAW_ALL_DB is True and DRAW_NEW_AD_ONLY is False:
                # add already know marker
                for x in self.m_list:
                    self.add_marker(x, False) 
開發者ID:sbourdelin,項目名稱:scrapyjiji,代碼行數:12,代碼來源:scrapyjiji.py

示例9: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self, *a, **kw):
        super(TianqiSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

        self.sql = SqlHelper()
        self.weather_table_name = config.weather_table
        self.citys = []

        self.init() 
開發者ID:awolfly9,項目名稱:weather,代碼行數:11,代碼來源:tianqi.py

示例10: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed) 
開發者ID:mersanuzun,項目名稱:alsam_mi_ki,代碼行數:5,代碼來源:pipelines.py

示例11: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self):
        self.filename += settings.MARKET_NAME
        self.filename += ".db"
        self.filename = path.join(settings.DATABASE_DIR, self.filename)
        print self.filename
        self.conn = None
        dispatcher.connect(self.initialize, signals.engine_started)
        dispatcher.connect(self.finalize, signals.engine_stopped) 
開發者ID:M157q,項目名稱:Android-Repackaged-App-Detection-System,代碼行數:10,代碼來源:pipelines.py

示例12: initialize

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def initialize(self):
        if path.exists(self.filename):
            self.conn = sqlite3.connect(self.filename)
        else:
            self.create_table()
        self.conn.execute("PRAGMA journal_mode=WAL;")
        self.conn.commit() 
開發者ID:M157q,項目名稱:Android-Repackaged-App-Detection-System,代碼行數:9,代碼來源:pipelines.py

示例13: create_table

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def create_table(self):
        self.conn = sqlite3.connect(self.filename)
        self.conn.execute("create table apps( \
                id integer primary key autoincrement, \
                url varchar(100) not null unique, \
                downloaded int default 0)"
            )
        self.conn.commit() 
開發者ID:M157q,項目名稱:Android-Repackaged-App-Detection-System,代碼行數:10,代碼來源:pipelines.py

示例14: __init__

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def __init__(self, *args, **kwargs):
        super(FullDomainSpider, self).__init__(*args, **kwargs)
        self.allowed_domains = kwargs.get('allowed_domains').split(',')
        self.org = kwargs.get('org')
        self.start_urls = kwargs.get('start_urls').split(',')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed) 
開發者ID:ciscocsirt,項目名稱:malspider,代碼行數:9,代碼來源:full_domain_spider.py

示例15: spider_opened

# 需要導入模塊: from scrapy.xlib.pydispatch import dispatcher [as 別名]
# 或者: from scrapy.xlib.pydispatch.dispatcher import connect [as 別名]
def spider_opened(self, spider):
        self.conn = MySQLdb.connect(host=settings.MYSQL_HOST, db=settings.MYSQL_DB, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, charset='utf8', use_unicode=True)
        cursor = spider.conn.cursor()
        sql_str = "SELECT pattern from whitelist"
        cursor.execute(sql_str)
        self.custom_whitelist = cursor.fetchall()
        try:
            alexa_whitelist_file = pkgutil.get_data("malspider", "resources/alexa-1k-whitelist.csv").decode('ascii')
            self.alexa_whitelist = alexa_whitelist_file.splitlines()
        except:
            log.msg("Error loading alexa whitelist...", level=log.ERROR) 
開發者ID:ciscocsirt,項目名稱:malspider,代碼行數:13,代碼來源:full_domain_spider.py


注:本文中的scrapy.xlib.pydispatch.dispatcher.connect方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。