本文整理汇总了Python中frontera.settings.Settings类的典型用法代码示例。如果您正苦于以下问题:Python Settings类的具体用法?Python Settings怎么用?Python Settings使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Settings类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
"""
Parse arguments, set configuration values, then start the broker
"""
parser = ArgumentParser(description="Crawl frontier worker.")
parser.add_argument(
'--config', type=str,
help='Settings module name, should be accessible by import.')
parser.add_argument(
'--address', type=str,
help='Hostname, IP address or Wildcard * to bind. Default is 127.0.0.1'
'. When binding to wildcard it defaults to IPv4.')
parser.add_argument(
'--log-level', '-L', type=str, default='INFO',
help='Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is'
' INFO.')
parser.add_argument(
'--port', type=int,
help='Base port number, server will bind to 6 ports starting from base'
'. Default is 5550')
args = parser.parse_args()
settings = Settings(module=args.config)
address = args.address if args.address else settings.get("ZMQ_ADDRESS")
port = args.port if args.port else settings.get("ZMQ_BASE_PORT")
server = Server(address, port)
server.logger.setLevel(args.log_level)
server.start()
示例2: sw_setup_filtered_links
def sw_setup_filtered_links(self):
settings = Settings()
settings.BACKEND = 'frontera.contrib.backends.sqlalchemy.Distributed'
settings.MESSAGE_BUS = 'tests.mocks.message_bus.FakeMessageBus'
settings.STRATEGY = 'tests.test_worker_strategy.FilteredLinksCrawlingStrategy'
settings.SPIDER_LOG_CONSUMER_BATCH_SIZE = 100
return StrategyWorker(settings, False)
示例3: __init__
def __init__(self):
settings = Settings()
settings.set("SPIDER_FEED_PARTITIONS", 1)
settings.set("QUEUE_HOSTNAME_PARTITIONING", True)
self.mb = MessageBus(settings)
sl = self.mb.spider_log()
# sw
self.sw_sl_c = sl.consumer(partition_id=0, type="sw")
us = self.mb.scoring_log()
self.sw_us_p = us.producer()
sleep(0.1)
# db
self.db_sl_c = sl.consumer(partition_id=None, type="db")
self.db_us_c = us.consumer()
sf = self.mb.spider_feed()
self.db_sf_p = sf.producer()
sleep(0.1)
# spider
self.sp_sl_p = sl.producer()
self.sp_sf_c = sf.consumer(0)
sleep(0.1)
示例4: sw_setup_add_seeds
def sw_setup_add_seeds(self):
settings = Settings()
settings.BACKEND = 'frontera.contrib.backends.sqlalchemy.Distributed'
settings.MESSAGE_BUS = 'tests.mocks.message_bus.FakeMessageBus'
settings.SPIDER_LOG_CONSUMER_BATCH_SIZE = 100
settings.STRATEGY = 'tests.mocks.components.CrawlingStrategy'
return StrategyWorker(settings, True)
示例5: test_blocking_middleware
def test_blocking_middleware(self):
settings = Settings()
settings.BACKEND = 'tests.mocks.components.FakeBackend'
settings.MIDDLEWARES = ['frontera.contrib.middlewares.domain.DomainMiddleware',
'frontera.contrib.middlewares.fingerprint.UrlFingerprintMiddleware',
'tests.mocks.components.FakeMiddleware',
'tests.mocks.components.FakeMiddlewareModifySeeds',
'tests.mocks.components.FakeMiddlewareBlocking',
'tests.mocks.components.FakeMiddlewareModifyResponse',
'tests.mocks.components.FakeMiddlewareModifyLinks']
settings.CANONICAL_SOLVER = 'tests.mocks.components.FakeCanonicalSolver'
settings.STRATEGY = 'tests.mocks.components.CrawlingStrategy'
fm = LocalFrontierManager.from_settings(settings)
SEEDS_FILE.seek(0)
fm.add_seeds(SEEDS_FILE)
response = Response(r1.url, request=r1)
fm.page_crawled(response)
fm.links_extracted(r1, links=[r2])
fm.request_error(r3, 'error')
#the seeds, responses, links and errors have not reached the backend.
assert [len(list) for list in fm.backend.lists] == [0]*4
#the 3 seeds reach the first three middlewares.
assert [len(fm.middlewares[i].requests) for i in range(2, 5)] == [3]*3
#the error, response and link reached the first three middlewares.
assert [[len(list) for list in fm.middlewares[i].lists[1:]] for i in range(2, 5)] == [[1]*3]*3
#the values do not reach the bottom 2 middlewares and the canonical solver.
assert [[len(list) for list in fm.middlewares[i].lists] for i in range(5, 7)] == [[0]*4]*2
assert [len(list) for list in fm.canonicalsolver.lists] == [0]*4
示例6: test_blocking_middleware
def test_blocking_middleware(self):
settings = Settings()
settings.BACKEND = 'tests.mocks.components.FakeBackend'
settings.MIDDLEWARES = ['tests.mocks.components.FakeMiddleware',
'tests.mocks.components.FakeMiddlewareModifySeeds',
'tests.mocks.components.FakeMiddlewareBlocking',
'tests.mocks.components.FakeMiddlewareModifyResponse',
'tests.mocks.components.FakeMiddlewareModifyLinks']
settings.CANONICAL_SOLVER = 'tests.mocks.components.FakeCanonicalSolver'
fm = FrontierManager.from_settings(settings)
fm.add_seeds([r1, r2, r3])
response = Response(r1.url, request=r1)
fm.page_crawled(response)
fm.links_extracted(r1, links=[r2])
fm.request_error(r3, 'error')
#the seeds, responses, links and errors have not reached the backend.
assert [len(list) for list in fm.backend.lists] == [0]*4
#the 3 seeds reach the first three middlewares.
assert [len(fm.middlewares[i].seeds) for i in range(3)] == [3]*3
#the error, response and link reached the first three middlewares.
assert [[len(list) for list in fm.middlewares[i].lists[1:]] for i in range(3)] == [[1]*3]*3
#the values do not reach the bottom 2 middlewares and the canonical solver.
assert [[len(list) for list in fm.middlewares[i].lists] for i in range(3, 5)] == [[0]*4]*2
assert [len(list) for list in fm.canonicalsolver.lists] == [0]*4
示例7: strategy
def strategy(self):
settings = Settings()
settings.BACKEND = 'frontera.contrib.backends.sqlalchemy.Distributed'
settings.STRATEGY = 'tests.test_strategy.DummyCrawlingStrategy'
manager = WorkerFrontierManager.from_settings(settings, db_worker=False, strategy_worker=True)
stream = MessageBusStream()
states = MemoryStates(10)
states_ctx = StatesContext(states)
return manager.strategy
示例8: dbw_setup
def dbw_setup(self, distributed=False):
settings = Settings()
settings.MAX_NEXT_REQUESTS = 64
settings.MESSAGE_BUS = 'tests.mocks.message_bus.FakeMessageBus'
if distributed:
settings.BACKEND = 'tests.mocks.components.FakeDistributedBackend'
else:
settings.BACKEND = 'tests.mocks.components.FakeBackend'
return DBWorker(settings, True, True, False)
示例9: test_max_requests_reached
def test_max_requests_reached(self):
settings = Settings()
settings.MAX_REQUESTS = 2
fm = self.setup_frontier_manager(settings)
fm.backend.put_requests([r1, r2, r3])
requests = set(fm.get_next_requests(10))
assert requests == set([r1, r2]) or requests == set([r2, r3]) or requests == set([r1, r3])
assert fm.get_next_requests(10) == []
assert fm.finished is True
示例10: test_feed_partitions_less_than_equal_partion_id_and_partion_id_less_than_zero
def test_feed_partitions_less_than_equal_partion_id_and_partion_id_less_than_zero(self):
settings = Settings()
# test partition_id > feed_partitions
settings.SPIDER_PARTITION_ID = 2
settings.SPIDER_FEED_PARTITIONS = 1
self.assertRaises(ValueError, self.mbb_setup, settings)
# test partition_id = feed_partitions
settings.SPIDER_PARTITION_ID = 1
self.assertRaises(ValueError, self.mbb_setup, settings)
# test partition_id < 0
settings.SPIDER_PARTITION_ID = -1
self.assertRaises(ValueError, self.mbb_setup, settings)
示例11: __init__
def __init__(self, manager):
self._manager = manager
settings = Settings(attributes=manager.settings.attributes)
messagebus = load_object(settings.get('MESSAGE_BUS'))
self.mb = messagebus(settings)
store_content = settings.get('STORE_CONTENT')
self._encoder = Encoder(manager.request_model, send_body=store_content)
self._decoder = Decoder(manager.request_model, manager.response_model)
self.spider_log_producer = self.mb.spider_log().producer()
spider_feed = self.mb.spider_feed()
self.partition_id = settings.get('SPIDER_PARTITION_ID')
self.consumer = spider_feed.consumer(partition_id=self.partition_id)
self._get_timeout = float(settings.get('KAFKA_GET_TIMEOUT'))
self._buffer = OverusedBuffer(self._get_next_requests,
manager.logger.manager.debug)
示例12: seed_loader_setup
def seed_loader_setup(self, seeds_content=None):
seed_path = os.path.join(self.tmp_path, 'seeds.txt')
default_content = """
https://www.example.com
https://www.scrapy.org
"""
seeds_content = seeds_content or default_content
with open(seed_path, 'wb') as tmpl_file:
tmpl_file.write(seeds_content.encode('utf-8'))
assert os.path.isfile(seed_path) # Failure of test itself
settings = Settings()
settings.SEEDS_SOURCE = seed_path
crawler = type('crawler', (object,), {})
crawler.settings = settings
return FileSeedLoader(crawler)
示例13: from_settings
def from_settings(cls,
settings=None,
db_worker=False,
strategy_worker=False):
"""
Returns a :class:`FrontierManager <frontera.core.manager.FrontierManager>` instance initialized with \
the passed settings argument. If no settings is given,
:ref:`frontier default settings <frontier-default-settings>` are used.
"""
manager_settings = Settings.object_from(settings)
return FrontierManager(
request_model=manager_settings.REQUEST_MODEL,
response_model=manager_settings.RESPONSE_MODEL,
backend=manager_settings.BACKEND,
logger=manager_settings.LOGGER,
event_log_manager=manager_settings.EVENT_LOG_MANAGER,
middlewares=manager_settings.MIDDLEWARES,
test_mode=manager_settings.TEST_MODE,
max_requests=manager_settings.MAX_REQUESTS,
max_next_requests=manager_settings.MAX_NEXT_REQUESTS,
auto_start=manager_settings.AUTO_START,
settings=manager_settings,
canonicalsolver=manager_settings.CANONICAL_SOLVER,
db_worker=db_worker,
strategy_worker=strategy_worker)
示例14: setup_environment
def setup_environment():
parser = ArgumentParser(description="Frontera strategy worker.")
parser.add_argument('--config', type=str, required=True,
help='Settings module name, should be accessible by import')
parser.add_argument('--log-level', '-L', type=str, default='INFO',
help="Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL")
parser.add_argument('--strategy', type=str,
help='Crawling strategy class path')
parser.add_argument('--partition-id', type=int,
help="Instance partition id.")
args = parser.parse_args()
settings = Settings(module=args.config)
strategy_classpath = args.strategy if args.strategy else settings.get('CRAWLING_STRATEGY')
if not strategy_classpath:
raise ValueError("Couldn't locate strategy class path. Please supply it either using command line option or "
"settings file.")
strategy_class = load_object(strategy_classpath)
partition_id = args.partition_id if args.partition_id is not None else settings.get('SCORING_PARTITION_ID')
if partition_id >= settings.get('SPIDER_LOG_PARTITIONS') or partition_id < 0:
raise ValueError("Partition id (%d) cannot be less than zero or more than SPIDER_LOG_PARTITIONS." %
partition_id)
settings.set('SCORING_PARTITION_ID', partition_id)
logging_config_path = settings.get("LOGGING_CONFIG")
if logging_config_path and exists(logging_config_path):
fileConfig(logging_config_path)
else:
logging.basicConfig(level=args.log_level)
logger.setLevel(args.log_level)
logger.addHandler(CONSOLE)
return settings, strategy_class
示例15: main
def main():
parser = ArgumentParser(description="Crawl frontier worker.")
parser.add_argument('--config', type=str,
help='Settings module name, should be accessible by import.')
parser.add_argument('--hostname', type=str,
help='Hostname or IP address to bind. Default is 127.0.0.1')
parser.add_argument('--log-level', '-L', type=str, default='INFO',
help='Log level, for ex. DEBUG, INFO, WARN, ERROR, FATAL. Default is INFO.')
parser.add_argument('--port', type=int,
help='Base port number, server will bind to 6 ports starting from base. Default is 5550')
args = parser.parse_args()
settings = Settings(module=args.config)
hostname = args.hostname if args.hostname else settings.get("ZMQ_HOSTNAME")
port = args.port if args.port else settings.get("ZMQ_BASE_PORT")
server = Server(hostname, port)
server.logger.setLevel(args.log_level)
server.start()