本文整理汇总了Python中kafka.client_async.KafkaClient.poll方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaClient.poll方法的具体用法?Python KafkaClient.poll怎么用?Python KafkaClient.poll使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.client_async.KafkaClient
的用法示例。
在下文中一共展示了KafkaClient.poll方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_poll
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_poll(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(api_version=(0, 9))
tasks = mocker.patch.object(cli._delayed_tasks, 'next_at')
# metadata timeout wins
metadata.return_value = 1000
tasks.return_value = 2
cli.poll()
_poll.assert_called_with(1.0, sleep=True)
# user timeout wins
cli.poll(250)
_poll.assert_called_with(0.25, sleep=True)
# tasks timeout wins
tasks.return_value = 0
cli.poll(250)
_poll.assert_called_with(0, sleep=True)
# default is request_timeout_ms
metadata.return_value = 1000000
tasks.return_value = 10000
cli.poll()
_poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0,
sleep=True)
示例2: test_maybe_refresh_metadata_ttl
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_maybe_refresh_metadata_ttl(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(request_timeout_ms=9999999, retry_backoff_ms=2222)
tasks = mocker.patch.object(cli._delayed_tasks, 'next_at')
tasks.return_value = 9999999
ttl = mocker.patch.object(cli.cluster, 'ttl')
ttl.return_value = 1234
cli.poll(timeout_ms=9999999, sleep=True)
_poll.assert_called_with(1.234, sleep=True)
示例3: test_maybe_refresh_metadata_backoff
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_maybe_refresh_metadata_backoff(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(request_timeout_ms=9999999, retry_backoff_ms=2222)
tasks = mocker.patch.object(cli._delayed_tasks, 'next_at')
tasks.return_value = 9999999
ttl = mocker.patch.object(cli.cluster, 'ttl')
ttl.return_value = 0
now = time.time()
t = mocker.patch('time.time')
t.return_value = now
cli._last_no_node_available_ms = now * 1000
cli.poll(timeout_ms=9999999, sleep=True)
_poll.assert_called_with(2.222, sleep=True)
示例4: test_maybe_refresh_metadata_update
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_maybe_refresh_metadata_update(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(request_timeout_ms=9999999, retry_backoff_ms=2222)
tasks = mocker.patch.object(cli._delayed_tasks, 'next_at')
tasks.return_value = 9999999
ttl = mocker.patch.object(cli.cluster, 'ttl')
ttl.return_value = 0
mocker.patch.object(cli, 'least_loaded_node', return_value='foobar')
mocker.patch.object(cli, '_can_send_request', return_value=True)
send = mocker.patch.object(cli, 'send')
cli.poll(timeout_ms=9999999, sleep=True)
_poll.assert_called_with(0, sleep=True)
assert cli._metadata_refresh_in_progress
request = MetadataRequest[0]([])
send.assert_called_with('foobar', request)
示例5: test_maybe_refresh_metadata_failure
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_maybe_refresh_metadata_failure(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(request_timeout_ms=9999999, retry_backoff_ms=2222)
tasks = mocker.patch.object(cli._delayed_tasks, 'next_at')
tasks.return_value = 9999999
ttl = mocker.patch.object(cli.cluster, 'ttl')
ttl.return_value = 0
mocker.patch.object(cli, 'least_loaded_node', return_value='foobar')
now = time.time()
t = mocker.patch('time.time')
t.return_value = now
cli.poll(timeout_ms=9999999, sleep=True)
_poll.assert_called_with(0, sleep=True)
assert cli._last_no_node_available_ms == now * 1000
assert not cli._metadata_refresh_in_progress
示例6: test_poll
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
def test_poll(mocker):
mocker.patch.object(KafkaClient, '_bootstrap')
metadata = mocker.patch.object(KafkaClient, '_maybe_refresh_metadata')
_poll = mocker.patch.object(KafkaClient, '_poll')
cli = KafkaClient(api_version=(0, 9))
# metadata timeout wins
metadata.return_value = 1000
cli.poll()
_poll.assert_called_with(1.0)
# user timeout wins
cli.poll(250)
_poll.assert_called_with(0.25)
# default is request_timeout_ms
metadata.return_value = 1000000
cli.poll()
_poll.assert_called_with(cli.config['request_timeout_ms'] / 1000.0)
示例7: OffsetsFetcherAsync
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
class OffsetsFetcherAsync(object):
DEFAULT_CONFIG = {
'session_timeout_ms': 30000,
'heartbeat_interval_ms': 3000,
'retry_backoff_ms': 100,
'api_version': (0, 9),
'metric_group_prefix': ''
}
def __init__(self, **configs):
self.config = copy.copy(self.DEFAULT_CONFIG)
self.config.update(configs)
self._client = KafkaClient(**self.config)
self._coordinator_id = None
self.group_id = configs['group_id']
self.topic = configs['topic']
def _ensure_coordinator_known(self):
"""Block until the coordinator for this group is known
(and we have an active connection -- java client uses unsent queue).
"""
while self._coordinator_unknown():
# Prior to 0.8.2 there was no group coordinator
# so we will just pick a node at random and treat
# it as the "coordinator"
if self.config['api_version'] < (0, 8, 2):
self._coordinator_id = self._client.least_loaded_node()
self._client.ready(self._coordinator_id)
continue
future = self._send_group_coordinator_request()
self._client.poll(future=future)
if future.failed():
if isinstance(future.exception,
Errors.GroupCoordinatorNotAvailableError):
continue
elif future.retriable():
metadata_update = self._client.cluster.request_update()
self._client.poll(future=metadata_update)
else:
raise future.exception # pylint: disable-msg=raising-bad-type
def _coordinator_unknown(self):
"""Check if we know who the coordinator is and have an active connection
Side-effect: reset _coordinator_id to None if connection failed
Returns:
bool: True if the coordinator is unknown
"""
if self._coordinator_id is None:
return True
if self._client.is_disconnected(self._coordinator_id):
self._coordinator_dead()
return True
return False
def _coordinator_dead(self, error=None):
"""Mark the current coordinator as dead."""
if self._coordinator_id is not None:
log.warning("Marking the coordinator dead (node %s) for group %s: %s.",
self._coordinator_id, self.group_id, error)
self._coordinator_id = None
def _send_group_coordinator_request(self):
"""Discover the current coordinator for the group.
Returns:
Future: resolves to the node id of the coordinator
"""
node_id = self._client.least_loaded_node()
if node_id is None:
return Future().failure(Errors.NoBrokersAvailable())
log.debug("Sending group coordinator request for group %s to broker %s",
self.group_id, node_id)
request = GroupCoordinatorRequest[0](self.group_id)
future = Future()
_f = self._client.send(node_id, request)
_f.add_callback(self._handle_group_coordinator_response, future)
_f.add_errback(self._failed_request, node_id, request, future)
return future
def _handle_group_coordinator_response(self, future, response):
log.debug("Received group coordinator response %s", response)
if not self._coordinator_unknown():
# We already found the coordinator, so ignore the request
log.debug("Coordinator already known -- ignoring metadata response")
future.success(self._coordinator_id)
return
error_type = Errors.for_code(response.error_code)
if error_type is Errors.NoError:
ok = self._client.cluster.add_group_coordinator(self.group_id, response)
if not ok:
#.........这里部分代码省略.........
示例8: KafkaConsumer
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
class KafkaConsumer(six.Iterator):
"""Consume records from a Kafka cluster.
The consumer will transparently handle the failure of servers in the Kafka
cluster, and adapt as topic-partitions are created or migrate between
brokers. It also interacts with the assigned kafka Group Coordinator node
to allow multiple consumers to load balance consumption of topics (requires
kafka >= 0.9.0.0).
Arguments:
*topics (str): optional list of topics to subscribe to. If not set,
call subscribe() or assign() before consuming records.
Keyword Arguments:
bootstrap_servers: 'host[:port]' string (or list of 'host[:port]'
strings) that the consumer should contact to bootstrap initial
cluster metadata. This does not have to be the full node list.
It just needs to have at least one broker that will respond to a
Metadata API Request. Default port is 9092. If no servers are
specified, will default to localhost:9092.
client_id (str): a name for this client. This string is passed in
each request to servers and can be used to identify specific
server-side log entries that correspond to this client. Also
submitted to GroupCoordinator for logging with respect to
consumer group administration. Default: 'kafka-python-{version}'
group_id (str or None): name of the consumer group to join for dynamic
partition assignment (if enabled), and to use for fetching and
committing offsets. If None, auto-partition assignment (via
group coordinator) and offset commits are disabled.
Default: 'kafka-python-default-group'
key_deserializer (callable): Any callable that takes a
raw message key and returns a deserialized key.
value_deserializer (callable): Any callable that takes a
raw message value and returns a deserialized value.
fetch_min_bytes (int): Minimum amount of data the server should
return for a fetch request, otherwise wait up to
fetch_max_wait_ms for more data to accumulate. Default: 1.
fetch_max_wait_ms (int): The maximum amount of time in milliseconds
the server will block before answering the fetch request if
there isn't sufficient data to immediately satisfy the
requirement given by fetch_min_bytes. Default: 500.
max_partition_fetch_bytes (int): The maximum amount of data
per-partition the server will return. The maximum total memory
used for a request = #partitions * max_partition_fetch_bytes.
This size must be at least as large as the maximum message size
the server allows or else it is possible for the producer to
send messages larger than the consumer can fetch. If that
happens, the consumer can get stuck trying to fetch a large
message on a certain partition. Default: 1048576.
request_timeout_ms (int): Client request timeout in milliseconds.
Default: 40000.
retry_backoff_ms (int): Milliseconds to backoff when retrying on
errors. Default: 100.
reconnect_backoff_ms (int): The amount of time in milliseconds to
wait before attempting to reconnect to a given host.
Default: 50.
max_in_flight_requests_per_connection (int): Requests are pipelined
to kafka brokers up to this number of maximum requests per
broker connection. Default: 5.
auto_offset_reset (str): A policy for resetting offsets on
OffsetOutOfRange errors: 'earliest' will move to the oldest
available message, 'latest' will move to the most recent. Any
other value will raise the exception. Default: 'latest'.
enable_auto_commit (bool): If true the consumer's offset will be
periodically committed in the background. Default: True.
auto_commit_interval_ms (int): milliseconds between automatic
offset commits, if enable_auto_commit is True. Default: 5000.
default_offset_commit_callback (callable): called as
callback(offsets, response) response will be either an Exception
or a OffsetCommitResponse struct. This callback can be used to
trigger custom actions when a commit request completes.
check_crcs (bool): Automatically check the CRC32 of the records
consumed. This ensures no on-the-wire or on-disk corruption to
the messages occurred. This check adds some overhead, so it may
be disabled in cases seeking extreme performance. Default: True
metadata_max_age_ms (int): The period of time in milliseconds after
which we force a refresh of metadata even if we haven't seen any
partition leadership changes to proactively discover any new
brokers or partitions. Default: 300000
partition_assignment_strategy (list): List of objects to use to
distribute partition ownership amongst consumer instances when
group management is used.
Default: [RangePartitionAssignor, RoundRobinPartitionAssignor]
heartbeat_interval_ms (int): The expected time in milliseconds
between heartbeats to the consumer coordinator when using
Kafka's group management feature. Heartbeats are used to ensure
that the consumer's session stays active and to facilitate
rebalancing when new consumers join or leave the group. The
value must be set lower than session_timeout_ms, but typically
should be set no higher than 1/3 of that value. It can be
adjusted even lower to control the expected time for normal
rebalances. Default: 3000
session_timeout_ms (int): The timeout used to detect failures when
using Kafka's group managementment facilities. Default: 30000
max_poll_records (int): The maximum number of records returned in a
single call to poll().
receive_buffer_bytes (int): The size of the TCP receive buffer
(SO_RCVBUF) to use when reading data. Default: None (relies on
system defaults). The java client defaults to 32768.
send_buffer_bytes (int): The size of the TCP send buffer
#.........这里部分代码省略.........
示例9: __init__
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
class KafkaConsumerLag:
def __init__(self, bootstrap_servers):
self.client = KafkaClient(bootstrap_servers=bootstrap_servers)
self.client.check_version()
def _send(self, broker_id, request, response_type=None):
f = self.client.send(broker_id, request)
response = self.client.poll(future=f)
if response_type:
if response and len(response) > 0:
for r in response:
if isinstance(r, response_type):
return r
else:
if response and len(response) > 0:
return response[0]
return None
def check(self, group_topics=None, discovery=None):
"""
{
"<group>": {
"state": <str>,
"topics": {
"<topic>": {
"consumer_lag": <int>,
"partitions": {
"<partition>": {
"offset_first": <int>,
"offset_consumed": <int>,
"offset_last": <int>,
"lag": <int>
}
}
}
}
}
}
:param persist_groups:
:return: consumer statistics
"""
cluster = self.client.cluster
brokers = cluster.brokers()
# Consumer group ID -> list(topics)
if group_topics is None:
group_topics = {}
if discovery is None:
discovery = True
else:
group_topics = copy.deepcopy(group_topics)
# Set of consumer group IDs
consumer_groups = set(group_topics.iterkeys())
# Set of all known topics
topics = set(itertools.chain(*group_topics.itervalues()))
# Consumer group ID -> coordinating broker
consumer_coordinator = {}
# Coordinating broker - > list(consumer group IDs)
coordinator_consumers = {}
results = {}
for consumer_group in group_topics.iterkeys():
results[consumer_group] = {'state': None, 'topics': {}}
# Ensure connections to all brokers
for broker in brokers:
while not self.client.is_ready(broker.nodeId):
self.client.ready(broker.nodeId)
# Collect all active consumer groups
if discovery:
for broker in brokers:
response = self._send(broker.nodeId, _ListGroupsRequest(), _ListGroupsResponse)
if response:
for group in response.groups:
consumer_groups.add(group[0])
# Identify which broker is coordinating each consumer group
for group in consumer_groups:
response = self._send(next(iter(brokers)).nodeId, _GroupCoordinatorRequest(group), _GroupCoordinatorResponse)
if response:
consumer_coordinator[group] = response.coordinator_id
if response.coordinator_id not in coordinator_consumers:
coordinator_consumers[response.coordinator_id] = []
#.........这里部分代码省略.........
示例10: KafkaConsumer
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
#.........这里部分代码省略.........
committed = self._subscription.assignment[partition].committed
if committed is None:
self._coordinator.refresh_committed_offsets_if_needed()
committed = self._subscription.assignment[partition].committed
else:
commit_map = self._coordinator.fetch_committed_offsets([partition])
if partition in commit_map:
committed = commit_map[partition].offset
else:
committed = None
return committed
def topics(self):
"""Get all topic metadata topics the user is authorized to view.
[Not Implemented Yet]
Returns:
{topic: [partition_info]}
"""
raise NotImplementedError("TODO")
def partitions_for_topic(self, topic):
"""Get metadata about the partitions for a given topic.
Arguments:
topic (str): topic to check
Returns:
set: partition ids
"""
return self._client.cluster.partitions_for_topic(topic)
def poll(self, timeout_ms=0):
"""Fetch data from assigned topics / partitions.
Records are fetched and returned in batches by topic-partition.
On each poll, consumer will try to use the last consumed offset as the
starting offset and fetch sequentially. The last consumed offset can be
manually set through seek(partition, offset) or automatically set as
the last committed offset for the subscribed list of partitions.
Incompatible with iterator interface -- use one or the other, not both.
Arguments:
timeout_ms (int, optional): milliseconds to spend waiting in poll if
data is not available. If 0, returns immediately with any
records that are available now. Must not be negative. Default: 0
Returns:
dict: topic to list of records since the last fetch for the
subscribed list of topics and partitions
"""
assert timeout_ms >= 0, "Timeout must not be negative"
assert self._iterator is None, "Incompatible with iterator interface"
# poll for new data until the timeout expires
start = time.time()
remaining = timeout_ms
while True:
records = self._poll_once(remaining)
if records:
# before returning the fetched records, we can send off the
# next round of fetches and avoid block waiting for their
# responses to enable pipelining while the user is handling the
# fetched records.
示例11: KafkaAdminClient
# 需要导入模块: from kafka.client_async import KafkaClient [as 别名]
# 或者: from kafka.client_async.KafkaClient import poll [as 别名]
#.........这里部分代码省略.........
gc_response = self._send_request_to_node(self._client.least_loaded_node(), gc_request)
# use the extra error checking in add_group_coordinator() rather than
# immediately returning the group coordinator.
success = self._client.cluster.add_group_coordinator(group_id, gc_response)
if not success:
error_type = Errors.for_code(gc_response.error_code)
assert error_type is not Errors.NoError
# Note: When error_type.retriable, Java will retry... see
# KafkaAdminClient's handleFindCoordinatorError method
raise error_type(
"Could not identify group coordinator for group_id '{}' from response '{}'."
.format(group_id, gc_response))
group_coordinator = self._client.cluster.coordinator_for_group(group_id)
# will be None if the coordinator was never populated, which should never happen here
assert group_coordinator is not None
# will be -1 if add_group_coordinator() failed... but by this point the
# error should have been raised.
assert group_coordinator != -1
return group_coordinator
def _send_request_to_node(self, node_id, request):
"""Send a Kafka protocol message to a specific broker.
Will block until the message result is received.
:param node_id: The broker id to which to send the message.
:param request: The message to send.
:return: The Kafka protocol response for the message.
:exception: The exception if the message could not be sent.
"""
while not self._client.ready(node_id):
# poll until the connection to broker is ready, otherwise send()
# will fail with NodeNotReadyError
self._client.poll()
future = self._client.send(node_id, request)
self._client.poll(future=future)
if future.succeeded():
return future.value
else:
raise future.exception # pylint: disable-msg=raising-bad-type
def _send_request_to_controller(self, request):
"""Send a Kafka protocol message to the cluster controller.
Will block until the message result is received.
:param request: The message to send.
:return: The Kafka protocol response for the message.
"""
tries = 2 # in case our cached self._controller_id is outdated
while tries:
tries -= 1
response = self._send_request_to_node(self._controller_id, request)
# In Java, the error fieldname is inconsistent:
# - CreateTopicsResponse / CreatePartitionsResponse uses topic_errors
# - DeleteTopicsResponse uses topic_error_codes
# So this is a little brittle in that it assumes all responses have
# one of these attributes and that they always unpack into
# (topic, error_code) tuples.
topic_error_tuples = (response.topic_errors if hasattr(response, 'topic_errors')
else response.topic_error_codes)
# Also small py2/py3 compatibility -- py3 can ignore extra values
# during unpack via: for x, y, *rest in list_of_values. py2 cannot.
# So for now we have to map across the list and explicitly drop any
# extra values (usually the error_message)
for topic, error_code in map(lambda e: e[:2], topic_error_tuples):