本文整理汇总了Python中kafka.TopicPartition方法的典型用法代码示例。如果您正苦于以下问题:Python kafka.TopicPartition方法的具体用法?Python kafka.TopicPartition怎么用?Python kafka.TopicPartition使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka
的用法示例。
在下文中一共展示了kafka.TopicPartition方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _query_backend
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def _query_backend(self):
consumer = KafkaConsumer(
bootstrap_servers=KAFKA_HOST, value_deserializer=lambda v: JSONSerializer().loads(v.decode('utf-8'))
)
tp = TopicPartition(self.topic, 0)
consumer.assign([tp])
count = consumer.position(tp)
consumer.seek(tp, 0)
metrics = []
for i in range(count):
metrics.append(next(consumer))
return metrics
示例2: assign_partitions
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def assign_partitions(self, partitions):
"""
Assign partitions to consumer.
Arguments:
partitions(list) : list of [topic, partition] lists
example : [[topic1,1], [topic2,1]]
Returns:
None.
"""
print_info("assigning partitions to consumer {}".format(partitions))
topic_partitions = [TopicPartition(topic=tup[0], partition=tup[1]) for tup in partitions]
try:
self.kafka_consumer.assign(topic_partitions)
result = True
except KafkaError as exc:
print_error("Exception during assiging partitions - {}".format(exc))
result = False
return result
示例3: seek_to_position
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def seek_to_position(self, topic, partition, offset):
"""
Seek to the given offset.
Arguments:
topic(str): topic name
partition(int): partition number
offset(int): offset number
Returns:
result(bool) : False if exception occures, True otherwise
"""
print_info("seeking to position {}:{}:{}".format(topic, partition, offset))
topic_partition = TopicPartition(topic=topic, partition=partition)
try:
self.kafka_consumer.seek(partition=topic_partition, offset=offset)
result = True
except KafkaError as exc:
print_error("Exception during seek - {}".format(exc))
result = False
return result
示例4: debug
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def debug(self, topic):
c=KafkaConsumer(bootstrap_servers=kafka_hosts, client_id=self._client_id , group_id=None, api_version=(0,10))
# assign/subscribe topic
partitions=c.partitions_for_topic(topic)
if not partitions: raise Exception("Topic "+topic+" not exist")
c.assign([TopicPartition(topic,p) for p in partitions])
# seek to beginning if needed
c.seek_to_beginning()
# fetch messages
while True:
partitions=c.poll(100)
if partitions:
for p in partitions:
for msg in partitions[p]:
yield msg.value.decode('utf-8')
yield ""
c.close()
示例5: test_offset_for_times
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def test_offset_for_times(mocker):
partitions = [kafka.TopicPartition('ut_topic', 0)]
offsets_for_times = {tp: OffsetAndTimestamp(42, -1) for tp in partitions}
positions = {tp: 747 for tp in partitions}
mock = mocker.Mock()
mock.offsets_for_times.return_value = offsets_for_times
mock.position.side_effect = lambda tp: positions.get(tp, 0)
# Uses returned offset for time when provided
offsets = client.offsets_for_times(mock, partitions, 987654321)
assert len(offsets) == len(partitions)
assert all(tp in offsets for tp in partitions)
assert offsets[partitions[0]] == 42
# When offsets_for_times returns None returns position at end
offsets_for_times[partitions[0]] = None
offsets = client.offsets_for_times(mock, partitions, 987654321)
assert len(offsets) == len(partitions)
assert all(tp in offsets for tp in partitions)
assert offsets[partitions[0]] == 747
示例6: offsets_for_times
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def offsets_for_times(consumer, partitions, timestamp):
"""Augment KafkaConsumer.offsets_for_times to not return None
Parameters
----------
consumer : kafka.KafkaConsumer
This consumer must only be used for collecting metadata, and not
consuming. API's will be used that invalidate consuming.
partitions : list of kafka.TopicPartition
timestamp : number
Timestamp, in seconds since unix epoch, to return offsets for.
Returns
-------
dict from kafka.TopicPartition to integer offset
"""
# Kafka uses millisecond timestamps
timestamp_ms = int(timestamp * 1000)
response = consumer.offsets_for_times({p: timestamp_ms for p in partitions})
offsets = {}
for tp, offset_and_timestamp in response.items():
if offset_and_timestamp is None:
# No messages exist after timestamp. Fetch latest offset.
consumer.assign([tp])
consumer.seek_to_end(tp)
offsets[tp] = consumer.position(tp)
else:
offsets[tp] = offset_and_timestamp.offset
return offsets
示例7: offset_range_for_timestamp_range
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def offset_range_for_timestamp_range(brokers, start, end, topic):
"""Determine OffsetRange for a given timestamp range
Parameters
----------
client_config : ClientConfig
start : number
Unix timestamp in seconds
end : number
Unix timestamp in seconds
topic : str
Topic to fetch offsets for
Returns
-------
list of OffsetRange or None
Per-partition ranges of offsets to read
"""
consumer = kafka.KafkaConsumer(bootstrap_servers=brokers)
partitions = consumer.partitions_for_topic(topic)
if partitions is None:
# Topic does not exist.
return None
partitions = [kafka.TopicPartition(topic, p) for p in partitions]
o_start = offsets_for_times(consumer, partitions, start)
o_end = offsets_for_times(consumer, partitions, end)
return [OffsetRange(tp, o_start[tp], o_end[tp]) for tp in partitions]
示例8: pop_queries_for_worker
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def pop_queries_for_worker(self, worker_id: str, batch_size: int) -> List[Query]:
name = f'workers_{worker_id}_queries'
query_consumer = KafkaConsumer(name, bootstrap_servers=self.connection_url, auto_offset_reset='earliest', group_id=QUERIES_QUEUE)
partition = TopicPartition(name, 0)
partitiondic = query_consumer.end_offsets([partition])
offsetend = partitiondic.get(partition, None)
if offsetend == 0:
query_consumer.close()
return []
try:
queries = []
while True:
record = next(query_consumer)
queries.append(record.value)
query_consumer.commit()
if record.offset >= offsetend-1 or len(queries) == batch_size:
break
queries = [pickle.loads(x) for x in queries]
query_consumer.close()
return queries
except KafkaError:
query_consumer.close()
return []
示例9: get_latest_timestamp_order_from_topic
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def get_latest_timestamp_order_from_topic(topic):
consumer = KafkaConsumer(topic,
# client_id='fooltrader',
# group_id='fooltrader',
value_deserializer=lambda m: json.loads(m.decode('utf8')),
bootstrap_servers=[KAFKA_HOST])
topic_partition = TopicPartition(topic=topic, partition=0)
end_offset = consumer.end_offsets([topic_partition])[topic_partition]
if end_offset > 0:
# partition assigned after poll, and we could seek
consumer.poll(5, 1)
consumer.seek(topic_partition, end_offset - 1)
message = consumer.poll(10000, 500)
msgs = message[topic_partition]
if len(msgs) > 0:
record = msgs[-1]
timestamp = to_timestamp(record.value['timestamp'])
order = None
if 'order' in record.value:
order = record.value['order']
return timestamp, order
return None, None
示例10: seek
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def seek(self, consumer, topic, partition):
KafkaOffset = apps.get_model(app_label='logpipe', model_name='KafkaOffset')
tp = kafka.TopicPartition(topic=topic, partition=partition)
try:
obj = KafkaOffset.objects.get(topic=topic, partition=partition)
logger.debug('Seeking to offset "%s" on topic "%s", partition "%s"' % (obj.offset, topic, partition))
consumer.client.seek(tp, obj.offset)
except KafkaOffset.DoesNotExist:
logger.debug('Seeking to beginning of topic "%s", partition "%s"' % (topic, partition))
consumer.client.seek_to_beginning(tp)
示例11: _get_topic_partitions
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def _get_topic_partitions(self):
p = []
partitions = self.client.partitions_for_topic(self.topic_name)
if not partitions:
raise MissingTopicError('Could not find topic %s. Does it exist?' % self.topic_name)
for partition in partitions:
tp = kafka.TopicPartition(self.topic_name, partition=partition)
p.append(tp)
return p
示例12: iter_records
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def iter_records(self) -> Generator[Mapping, None, None]:
consumer = kafka.KafkaConsumer(bootstrap_servers=self.brokers,
group_id='mjolnir_msearch',
enable_auto_commit=False,
auto_offset_reset='latest',
value_deserializer=lambda x: json.loads(x.decode('utf8')),
api_version=mjolnir.kafka.BROKER_VERSION,
# Msearch requests are relatively heavy at a few tens of ms each.
# 50 requests at 50ms each gives us ~2.5s to process a batch. We
# keep this low so kafka regularly gets re-pinged.
max_poll_records=min(500, 50 * self.n_workers))
consumer.subscribe([self.topic_work])
try:
last_commit = 0.0
offset_commit_interval_sec = 60
offsets = cast(Dict[kafka.TopicPartition, kafka.OffsetAndMetadata], dict())
while self.load_monitor.is_below_threshold:
now = time.monotonic()
if offsets and now - last_commit > offset_commit_interval_sec:
consumer.commit_async(offsets)
last_commit = now
offsets = {}
# By polling directly, rather than using the iter based api, we
# have the opportunity to regularly re-check the load monitor
# and transition out of the consuming state if needed.
poll_response = consumer.poll(timeout_ms=60000)
if not poll_response:
continue
with Metric.PROCESS_BATCH.time():
for tp, records in poll_response.items():
for record in records:
self.load_monitor.notify()
yield record.value
# Wait for all the work to complete
self.work_queue.join()
for tp, records in poll_response.items():
offsets[tp] = kafka.OffsetAndMetadata(records[-1].offset + 1, '')
Metric.RECORDS_PROCESSED.inc(sum(len(x) for x in poll_response.values()))
finally:
if offsets:
consumer.commit(offsets)
consumer.close()
示例13: kafka_to_rdd
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def kafka_to_rdd(sc, client_config, offset_ranges):
"""Read ranges of kafka partitions into an RDD.
Parameters
----------
sc : pyspark.SparkContext
client_config : ClientConfig
offset_ranges : list of OffsetRange
List of topic partitions along with ranges to read. Start
and end of range are inclusive.
Returns
-------
pyspark.RDD
Contents of the specified offset_ranges
"""
def read_offset_range(offset_range):
if offset_range.end <= offset_range.start:
# Raise exception?
return
# After serialization round trip these fail an isinstance check.
# re-instantiate so we have the expected thing.
tp = kafka.TopicPartition(*offset_range.tp)
consumer = kafka.KafkaConsumer(bootstrap_servers=client_config.brokers,
value_deserializer=lambda x: json.loads(x.decode('utf8')))
try:
consumer.assign([tp])
consumer.seek(tp, offset_range.start)
while True:
poll_response = consumer.poll(timeout_ms=10000)
if poll_response and tp in poll_response:
for message in poll_response[tp]:
if message.offset > offset_range.end:
break
yield message.value
if consumer.position(tp) >= offset_range.end:
break
finally:
consumer.close()
return (
# TODO: This isn't the same as assigning each offset_range to a separate
# partition, but it doesn't seem like pyspark allows us to do that. Often
# enough this seems to achieve the same thing, but without guarantees.
sc.parallelize(offset_ranges, len(offset_ranges))
.flatMap(read_offset_range)
)
示例14: consume_topic_with_func
# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import TopicPartition [as 别名]
def consume_topic_with_func(self, topic, func):
consumer = KafkaConsumer(topic,
client_id='fooltrader',
group_id=self.bot_name,
value_deserializer=lambda m: json.loads(m.decode('utf8')),
bootstrap_servers=[KAFKA_HOST])
topic_partition = TopicPartition(topic=topic, partition=0)
if self.start_timestamp:
start_timestamp = int(self.start_timestamp.timestamp() * 1000)
end_offset = consumer.end_offsets([topic_partition])[topic_partition]
if end_offset == 0:
self.logger.warning("topic:{} end offset:{}".format(topic, end_offset))
self.logger.error("the topic:{} has no data,but you want to backtest".format(self.quote_topic))
return
# find the offset from start_timestamp
offset_and_timestamp = consumer.offsets_for_times({topic_partition: start_timestamp})
if offset_and_timestamp:
offset_and_timestamp = offset_and_timestamp[topic_partition]
if offset_and_timestamp:
# partition assigned after poll, and we could seek
consumer.poll(5, 1)
# move to the offset
consumer.seek(topic_partition, offset_and_timestamp.offset)
for message in consumer:
if 'timestamp' in message.value:
message_time = to_timestamp(message.value['timestamp'])
else:
message_time = to_timestamp(message.timestamp)
if self.end_timestamp and (message_time > self.end_timestamp):
consumer.close()
break
getattr(self, func)(message.value)
else:
latest_timestamp, _ = get_latest_timestamp_order_from_topic(self.quote_topic)
self.logger.warning(
"start:{} is after the last record:{}".format(self.start_timestamp, latest_timestamp))