當前位置: 首頁>>代碼示例>>Python>>正文


Python settings.get方法代碼示例

本文整理匯總了Python中scrapy.conf.settings.get方法的典型用法代碼示例。如果您正苦於以下問題:Python settings.get方法的具體用法?Python settings.get怎麽用?Python settings.get使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.conf.settings的用法示例。


在下文中一共展示了settings.get方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def __init__(self):
        """Class constructor."""
        self._fields_to_check = ['description', 'name', 'summary', 'reviews']
        self._minimum_monthly_discount = int(settings.get('MINIMUM_MONTHLY_DISCOUNT', None))
        self._minimum_weekly_discount = int(settings.get('MINIMUM_WEEKLY_DISCOUNT', None))

        self._skip_list = settings.get('SKIP_LIST', None)

        self._cannot_have_regex = settings.get('CANNOT_HAVE', None)
        if self._cannot_have_regex:
            self._cannot_have_regex = re.compile(str(self._cannot_have_regex), re.IGNORECASE)

        self._must_have_regex = settings.get('MUST_HAVE', None)
        if self._must_have_regex:
            self._must_have_regex = re.compile(str(self._must_have_regex), re.IGNORECASE)

        self._web_browser = settings.get('WEB_BROWSER', None)
        if self._web_browser:
            self._web_browser += ' %s'  # append URL placeholder (%s) 
開發者ID:bashedev,項目名稱:airbnb_scraper,代碼行數:21,代碼來源:pipelines.py

示例2: process_request

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def process_request(self, request, spider):
        ua  = random.choice(settings.get('USER_AGENT_LIST'))
        if ua:
            request.headers.setdefault('User-Agent', ua)


# class RetryChangeProxyMiddleware(RetryMiddleware):
# 	def _retry(self, request, reason, spider):
# 		log.msg('Changing proxy')
# 		tn = telnetlib.Telnet('127.0.0.1', 9051)
# 		tn.read_until("Escape character is '^]'.", 2)
# 		tn.write('AUTHENTICATE "267765"\r\n')
# 		tn.read_until("250 OK", 2)
# 		tn.write("signal NEWNYM\r\n")
# 		tn.read_until("250 OK", 2)
# 		tn.write("quit\r\n")
# 		tn.close()
# 		time.sleep(3)
# 		log.msg('Proxy changed')
# 		return RetryMiddleware._retry(self, request, reason, spider) 
開發者ID:parul1931,項目名稱:amazon,代碼行數:22,代碼來源:middlewares.py

示例3: find_item

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def find_item(self):
        '''
        Finds an item from the queue
        '''
        count = 0

        while count <= self.item_retries:
            item = self.queue.pop()
            if item:
                # very basic limiter
                time.sleep(1)
                return item
            # we want the spiders to get slightly out of sync
            # with each other for better performance
            time.sleep(random.random())
            count = count + 1

        return None 
開發者ID:WalnutATiie,項目名稱:scrapy-cluster,代碼行數:20,代碼來源:distributed_scheduler.py

示例4: get_link_extractor

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def get_link_extractor(self):
        return LinkExtractor(allow=r'^http://[a-z2-7]{16}.onion',
                             deny=[r'^https://blockchainbdgpzk.onion/address/',
                                   r'^https://blockchainbdgpzk.onion/tx/'],
                             deny_domains=settings.get('FAKE_DOMAINS')) 
開發者ID:ahmia,項目名稱:ahmia-crawler,代碼行數:7,代碼來源:onionspider.py

示例5: __init__

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def __init__(self, *args, **kwargs):
        self.rules = [Rule(self.get_link_extractor(),
                           callback=self.parse_item,
                           process_links=self.limit_links,
                           follow=True)]
        super(WebSpider, self).__init__(*args, **kwargs)
        target_sites = settings.get('TARGET_SITES')
        if target_sites and os.path.isfile(target_sites):
            # Read a list of URLs from file
            # Create the target file list
            with open(target_sites) as target_sites_file:
                # Make it to Python list
                self.start_urls = target_sites_file.read().splitlines()
                # Remove empty strings
                self.start_urls = [u for u in self.start_urls if u]
        else:
            self.start_urls = self.default_start_url 
開發者ID:ahmia,項目名稱:ahmia-crawler,代碼行數:19,代碼來源:base.py

示例6: main

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def main():
    """Run all spiders in their own threads.
    """
    # ensure we're in the root directory
    os.chdir(PROJECT_ROOT)
    # get the spider names
    spiders_module = settings.get('NEWSPIDER_MODULE').replace('.', os.sep)
    path = os.path.abspath(os.path.join(PROJECT_ROOT, spiders_module))
    spiders = glob('{path}/*.py'.format(**locals()))
    spiders = [
        os.path.basename(s)[:-3] for s in spiders
            if not s.endswith('__init__.py')]
    # start spiders in their own threads
    threads = []
    for spider in spiders:
        t = threading.Thread(target=worker, args=(spider,))
        threads.append(t)
        t.start() 
開發者ID:jamiebull1,項目名稱:remotor,代碼行數:20,代碼來源:main.py

示例7: __init__

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
        """Class constructor."""
        fields_to_export = settings.get('FIELDS_TO_EXPORT', [])
        if fields_to_export:
            kwargs['fields_to_export'] = fields_to_export

        super().__init__(**kwargs)

        self.include_headers_line = include_headers_line
        self._workbook = openpyxl.workbook.Workbook()
        self._worksheet = self._workbook.active
        self._headers_not_written = True
        self._join_multivalued = join_multivalued
        self._filename = file.name
        file.close() 
開發者ID:bashedev,項目名稱:airbnb_scraper,代碼行數:17,代碼來源:exporter.py

示例8: serialize_field

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def serialize_field(self, field, name, value):
        serializer = field.get('serializer', self._join_if_needed)
        return serializer(value) 
開發者ID:bashedev,項目名稱:airbnb_scraper,代碼行數:5,代碼來源:exporter.py

示例9: process_item

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def process_item(self, item, spider):
        """Drop items not fitting parameters. Open in browser if specified. Return accepted items."""

        if self._skip_list and str(item['id']) in self._skip_list:
            raise DropItem('Item in skip list: {}'.format(item['id']))

        if self._minimum_monthly_discount and 'monthly_discount' in item:
            if item['monthly_discount'] < self._minimum_monthly_discount:
                raise DropItem('Monthly discount too low: {}'.format(item['monthly_discount']))

        if self._minimum_weekly_discount and 'weekly_discount' in item:
            if item['weekly_discount'] < self._minimum_monthly_discount:
                raise DropItem('Weekly discount too low: {}'.format(item['weekly_discount']))

        # check regexes
        if self._cannot_have_regex:
            for f in self._fields_to_check:
                v = str(item[f].encode('ASCII', 'replace'))
                if self._cannot_have_regex.search(v):
                    raise DropItem('Found: {}'.format(self._cannot_have_regex.pattern))

        if self._must_have_regex:
            has_must_haves = False
            for f in self._fields_to_check:
                v = str(item[f].encode('ASCII', 'replace'))
                if self._must_have_regex.search(v):
                    has_must_haves = True
                    break

            if not has_must_haves:
                raise DropItem('Not Found: {}'.format(self._must_have_regex.pattern))

        # open in browser
        if self._web_browser:
            webbrowser.get(self._web_browser).open(item['url'])

        return item 
開發者ID:bashedev,項目名稱:airbnb_scraper,代碼行數:39,代碼來源:pipelines.py

示例10: process_request

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def process_request(self, request, spider):
        proxy = settings.get('PROXY')
        logger.info("process request %s using proxy %s" % (request, proxy))
        request.meta['proxy'] = proxy
        pass 
開發者ID:JiangFeng07,項目名稱:feng-python-apply,代碼行數:7,代碼來源:Proxy.py

示例11: random_ua

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def random_ua(self):
        #randomise user-agent from list to reduce chance of being banned
        ua  = random.choice(settings.get('USER_AGENT_LIST'))
        if ua:
            ua='Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36'
        return ua 
開發者ID:HashirZahir,項目名稱:FIFA-Player-Ratings,代碼行數:8,代碼來源:fifa_spider.py

示例12: process_request

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def process_request(self, request, spider):
        request.meta['proxy'] = settings.get('HTTP_PROXY')

# 
開發者ID:keepCodingDream,項目名稱:finTech,代碼行數:6,代碼來源:middlewares.py

示例13: from_settings

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def from_settings(cls, settings):
        server = redis.Redis(host=settings.get('REDIS_HOST'),
                                            port=settings.get('REDIS_PORT'))
        persist = settings.get('SCHEDULER_PERSIST', True)
        timeout = settings.get('DUPEFILTER_TIMEOUT', 600)
        retries = settings.get('SCHEDULER_ITEM_RETRIES', 3)

        return cls(server, persist, timeout, retries) 
開發者ID:WalnutATiie,項目名稱:scrapy-cluster,代碼行數:10,代碼來源:distributed_scheduler.py

示例14: from_crawler

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(WebSpider, cls).from_crawler(crawler, *args, **kwargs)
        if settings.get('FULL_PAGERANK_COMPUTE', False):
            crawler.signals.connect(spider.on_idle, signals.spider_idle)
        return spider 
開發者ID:ahmia,項目名稱:ahmia-crawler,代碼行數:7,代碼來源:base.py

示例15: process_request

# 需要導入模塊: from scrapy.conf import settings [as 別名]
# 或者: from scrapy.conf.settings import get [as 別名]
def process_request(self, request, spider): # pylint:disable=unused-argument
        """Process incoming request."""
        parsed_uri = urlparse(request.url)
        domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
        if ".onion" in domain and ".onion." not in domain:
            tor_proxy_list = settings.get('HTTP_PROXY_TOR_PROXIES')
            request.meta['proxy'] = random.choice(tor_proxy_list)
        elif ".i2p" in domain and ".i2p." not in domain:
            if parsed_uri.scheme == "https":
                request.meta['proxy'] = settings.get('HTTPS_PROXY_I2P')
            else:
                request.meta['proxy'] = settings.get('HTTP_PROXY_I2P') 
開發者ID:ahmia,項目名稱:ahmia-crawler,代碼行數:14,代碼來源:middleware.py


注:本文中的scrapy.conf.settings.get方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。