本文整理汇总了Python中kafka.KafkaConsumer.offsets方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaConsumer.offsets方法的具体用法?Python KafkaConsumer.offsets怎么用?Python KafkaConsumer.offsets使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer
的用法示例。
在下文中一共展示了KafkaConsumer.offsets方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generator
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import offsets [as 别名]
def generator():
keep_alive_in_a_row = 0
messages_read = 0
# init batch
messages_read_in_batch = 0
current_batch = {partition: [] for partition in partitions}
batch_start_time = time.time()
with kafka_pool.kafka_client() as client:
consumer = KafkaConsumer(topics,
kafka_client=client,
auto_commit_enable=False,
consumer_timeout_ms=200)
while True:
try:
message = consumer.next()
# if we read the message - reset keep alive counter
keep_alive_in_a_row = 0
# put message to batch
messages_read += 1
messages_read_in_batch += 1
current_batch[message.partition].append(message.value.decode('utf-8'))
except ConsumerTimeout:
pass
# check if it's time to send the batch
time_since_batch_start = time.time() - batch_start_time
latest_offsets = consumer.offsets("fetch")
if time_since_batch_start >= opts['batch_flush_timeout'] != 0 or \
messages_read_in_batch >= opts['batch_limit']:
yield from process_batch(latest_offsets, current_batch)
# if we hit keep alive count limit - close the stream
if messages_read_in_batch == 0:
if keep_alive_in_a_row >= opts['batch_keep_alive_limit'] != -1:
break
keep_alive_in_a_row += 1
# init new batch
messages_read_in_batch = 0
current_batch = {partition: [] for partition in partitions}
batch_start_time = time.time()
yield BATCH_SEPARATOR
# check if we reached the stream timeout or message count limit
time_since_start = time.time() - start
if time_since_start >= opts['stream_timeout'] > 0 or 0 < opts['stream_limit'] <= messages_read:
if messages_read_in_batch > 0:
yield from process_batch(latest_offsets, current_batch)
break
示例2: testRestart
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import offsets [as 别名]
def testRestart():
'''test restart
测试某一个group在消费了kafka中的消息并commit之后,是否能重新读取?
->事实是不行
'''
topicsList = ['JOB_NGINX', 'JOB_BASIC', 'JOB_JMON', 'JOB_TOMCAT']
brokerList = ['10.10.90.171:9092', '10.10.82.114:9092', '10.10.94.15:9092']
groupId = '8_consumer_group'
kc = KafkaConsumer(
*topicsList,
fetch_min_bytes = 1024,
group_id = groupId,
bootstrap_servers = brokerList,
consumer_timeout_ms = 10*1000,
auto_offset_reset='smallest'
)
print kc.offsets()
for partition in [0, 1, 2]:
consumerMsg = KafkaMessage('JOB_BASIC', \
partition, 2000, 'shit', 'shit')
kc.task_done(consumerMsg)
print kc.offsets()
kc.set_topic_partitions(
('JOB_BASIC', 0, 2000),
('JOB_BASIC', 1, 2000),
('JOB_BASIC', 2, 1000))
import pdb
pdb.set_trace()
while 1:
try:
for consumer in kc:
print consumer
print type(consumer)
kc.task_done(consumer)
except ConsumerTimeout:
kc.commit()
print 'xxxxxxxxxxxxxxxxx'
continue
示例3: testOffset
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import offsets [as 别名]
def testOffset():
'''test kafka offset
测试某一个group在设置offset是否能指定位置读取,其中没有进行commit提交操作
'''
topicsList = ['JOB_NGINX', 'JOB_BASIC', 'JOB_JMON', 'JOB_TOMCAT', 'JOB_KV']
groupId = '8_consumer_group'
brokerList = ['10.10.90.171:9092', '10.10.82.114:9092', '10.10.94.15:9092']
kc = KafkaConsumer(
*topicsList,
fetch_min_bytes = 1024,
group_id = groupId,
bootstrap_servers = brokerList,
consumer_timeout_ms = 10*1000
)
#offsetDict = kc.offsets()
#print
#print "commit:", offsetDict['commit']
#print "task_done:", offsetDict['task_done']
#print "fetch:", offsetDict['fetch']
#print '==============================================='
#while 1:
# try:
# for consumer in kc.fetch_messages():
# print consumer
# print type(consumer)
# except ConsumerTimeout:
# print 'xxxxxxxxxxxxxxxxx'
# continue
print '+++++++++++++++++++++++++++++++'
kc.set_topic_partitions(*topicsList)
while 1:
try:
for consumer in kc:
print consumer
print type(consumer)
except ConsumerTimeout:
print 'xxxxxxxxxxxxxxxxx'
continue
offsetDict = kc.offsets()
print
print "commit:", offsetDict['commit']
print "task_done:", offsetDict['task_done']
print "fetch:", offsetDict['fetch']
示例4: generator
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import offsets [as 别名]
def generator():
keep_alive_in_a_row = 0
messages_read = 0
# init batch
messages_read_in_batch = 0
current_batch = {}
for partition in partitions:
current_batch[partition] = []
batch_start_time = datetime.datetime.now()
with kafka_pool.kafka_client() as client:
consumer = KafkaConsumer(topics, kafka_client=client, auto_commit_enable=False, consumer_timeout_ms=200)
while True:
try:
message = consumer.next()
# if we read the message - reset keep alive counter
keep_alive_in_a_row = 0
# put message to batch
messages_read += 1
messages_read_in_batch += 1
message_json = json.loads(message.value.decode("utf-8"))
current_batch[message.partition].append(message_json)
except ConsumerTimeout:
pass
# check if it's time to send the batch
time_since_batch_start = datetime.datetime.now() - batch_start_time
latest_offsets = consumer.offsets("fetch")
if (
time_since_batch_start.total_seconds() >= opts["batch_flush_timeout"] != 0
or messages_read_in_batch >= opts["batch_limit"]
):
for partition in partitions:
topic_partition = (topic.encode("UTF-8"), partition)
# send the messages we could read so far
if len(current_batch[partition]) > 0:
stream_message = __create_stream_message(
partition, latest_offsets[topic_partition], current_batch[partition]
)
with __measure_time(current_batch[partition], stream_message):
yield stream_message
# just send the keep alive
else:
yield __create_stream_message(partition, latest_offsets[topic_partition])
# if we hit keep alive count limit - close the stream
if messages_read_in_batch == 0:
if keep_alive_in_a_row >= opts["batch_keep_alive_limit"] != -1:
break
keep_alive_in_a_row += 1
# init new batch
messages_read_in_batch = 0
current_batch = {}
for partition in partitions:
current_batch[partition] = []
batch_start_time = datetime.datetime.now()
yield BATCH_SEPARATOR
# check if we reached the stream timeout or message count limit
time_since_start = datetime.datetime.now() - start
if (
time_since_start.total_seconds() >= opts["stream_timeout"] > 0
or 0 < opts["stream_limit"] <= messages_read
):
if messages_read_in_batch > 0:
for partition in partitions:
topic_partition = (topic.encode("UTF-8"), partition)
# send the messages we could read so far
if len(current_batch[partition]) > 0:
stream_message = __create_stream_message(
partition, latest_offsets[topic_partition], current_batch[partition]
)
with __measure_time(current_batch[partitions], stream_message):
yield stream_message
# just send the keep alive
else:
yield __create_stream_message(partition, latest_offsets[topic_partition])
break
示例5: KafkaMQ
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import offsets [as 别名]
class KafkaMQ(MQBase):
"""
kafka://sparkh1:9092,sparkh2:9092,sparkh3:9092?topics=xx&group_id=xx
"""
_CLIENT_ID_ = 'PyThunderKafkaMQCli'
_GROUP_ID_ = 'PyThunderKafkaMQCliGroup'
maxLowTimeout = 0.1
def __init__(self, url):
super(KafkaMQ, self).__init__(url=url)
consumerKwargs, producerKwargs = self.fromUrl(url)
self._topic = consumerKwargs.pop('topic', '')
topics = self._topic.split(',')
self._consumer = KafkaConsumer(*topics, **consumerKwargs)
self._producer = Producer(**producerKwargs)
def qsize(self):
try:
with Timeout(seconds=self.maxLowTimeout, exception=None, ):
print('no offsets: {}'.format(self._get(noCommit=True)))
except:
pass
localOffsets = self._consumer.offsets()
return localOffsets
def _get(self, noCommit=False):
with Timeout(seconds=self.maxLowTimeout, exception=self.Empty):
kafkaMessage = self._consumer.next()
if noCommit:
return kafkaMessage
if kafkaMessage:
try:
return kafkaMessage.value
finally:
self._commit(kafkaMessage)
else:
raise self.Empty
def _put(self, item):
with Timeout(seconds=self.maxLowTimeout, exception=self.Full):
status = self._producer.send_messages(self._topic, None, *[item])
if status:
return 10
def _commit(self, message):
try:
self._consumer.task_done(message)
self._consumer.commit()
except:
pass
def close(self):
try:
del self._consumer
self._producer.close()
del self._producer
except:
pass
def fromUrl(self, url):
urlInfo = urlparse(url)
qs = urlInfo.query and urlInfo.query or ''
kwargs = dict()
options = dict()
options['hosts'] = urlInfo.netloc
options['topic'] = urlInfo.path.strip('/')
for name, value in six.iteritems(dict(parse_qsl(qs))):
if value:
options[name] = value
self.maxSize = options.pop('maxSize', 10000)
self.lazyLimit = options.pop('lazyLimit', True)
options.setdefault('group', self._GROUP_ID_ + '-{}'.format(id(self)))
# options.setdefault('group')
options.setdefault('client', self._CLIENT_ID_)
if urlInfo.scheme != 'kafka':
raise AttributeError('schema {} not supported'.format(urlInfo.scheme))
else:
for name, value in six.iteritems(options):
mirror = urlOptMaps.get(name)
if mirror:
value = mirror[1](value)
if mirror == 'bootstrap_servers':
value = value.split(',')
kwargs[mirror[0]] = value
else:
kwargs[name] = value
return kwargs, {
'hosts': options.pop('hosts', '').split(','),
'client_id': options.pop('client_id', self._CLIENT_ID_),
'timeout': options.pop('timeout', 120)
}