本文整理汇总了Python中kafka.KafkaConsumer.task_done方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaConsumer.task_done方法的具体用法?Python KafkaConsumer.task_done怎么用?Python KafkaConsumer.task_done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer
的用法示例。
在下文中一共展示了KafkaConsumer.task_done方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_kafka_events
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def check_kafka_events():
global loopCondition
from kafka import KafkaConsumer, KafkaClient, SimpleProducer
warehouse_addr = Conf.getWareHouseAddr()
consumer = KafkaConsumer("%sResult"%wk.options.warehouse,
bootstrap_servers=[warehouse_addr],
group_id="cnlab",
auto_commit_enable=True,
auto_commit_interval_ms=30 * 1000,
auto_offset_reset='smallest')
while loopCondition:
for message in consumer.fetch_messages():
print "topic=%s, partition=%s, offset=%s, key=%s " % (message.topic, message.partition,
message.offset, message.key)
task = cPickle.loads(message.value)
if task.state == Task.TASK_FINISHED:
print "taskId:%s,success!!!:%s"%(task.id,task.result)
else:
print "taskId:%s,failed!!!"%task.id
consumer.task_done(message)
last_data_time = time.time()
if not loopCondition:
break
示例2: advanceConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def advanceConsumer():
'''
multipic topics and auto commit offset
'''
consumer = KafkaConsumer('bamboo1', 'bamboo2',
bootstrap_servers=['10.1.200.63:9092'],
group_id = '8_consumer_group',
auto_commit_enable=True,
auto_commit_interval_ms=30 * 1000,
auto_offset_reset='smallest')
# initialize iteration
for message in consumer:
print("TOPIC:%s Partition:%d offset%d key=%s value=%s" % (\
message.topic, message.partition,
message.offset, message.key,
message.value))
consumer.task_done(message)
consumer.commit()
# Batch process interface
while True:
for m in consumer.fetch_messages():
print("===Topic:%s Partition:%d offset%d key=%s value=%s" % (\
message.topic, message.partition,
message.offset, message.key,
message.value))
consumer.task_done(m)
示例3: consume
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def consume(self):
consumer = None
try:
print "\nCONSUMER TOPICS = " + str(self.listener_topics)
consumer = KafkaConsumer(*self.listener_topics,
client_id=self.name,
group_id='kafka',
bootstrap_servers=self.connection_string,
auto_offset_reset='smallest')
self._set_alive(True)
except Exception as e:
print "A consumer couldn't be created."
print e
while is_running(self.name):
for message in consumer.fetch_messages():
asset_success = True
message_success = True
if not is_running(self.name):
break
try:
try:
key = Key.objects.get(listener=Listener.objects.get(listener_topic=message.topic),
listener_key=message.key)
feature_data = json.loads(message.value)
for asset_type in ['photos', 'videos', 'sounds']:
if feature_data.get('properties').get(asset_type):
import urllib2
urls = []
for index, value in enumerate(feature_data.get('properties').get(asset_type)):
asset, created = write_asset(key, value, asset_type, feature_data.get('properties').get('{}_url'.format(asset_type))[index-1])
if not asset:
asset_success = False
else:
print "Asset {} was written.".format(value)
urls += [asset.asset_data.url]
feature_data['properties']['{}_url'.format(asset_type)] = urls
print "URLS:" + str(urls)
if not write_message(key, json.dumps(feature_data)):
message_success = False
else:
print "Message {} was written.".format(feature_data.get('properties').get('city'))
except Exception as e:
if 'DoesNotExist' in e:
continue
else:
print e
message_success = False
except KeyboardInterrupt:
break
if message_success and asset_success:
consumer.task_done(message)
consumer.commit()
consumer.close()
self._set_alive(False)
示例4: create_consumer_group
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def create_consumer_group(topic, group_name, num_messages=1):
consumer = KafkaConsumer(
topic,
group_id=group_name,
auto_commit_enable=False,
bootstrap_servers=[KAFKA_URL],
auto_offset_reset='smallest')
for i in xrange(num_messages):
message = consumer.next()
consumer.task_done(message)
consumer.commit()
return consumer
示例5: __init__
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
class KafkaEventsConsumer:
'''A generic kafka consumer of events'''
def __init__(self, topic, group_id, host, port, strategy):
'''Create a new KafkaRealTimeEventsProducer instance
topic the kafka topic to of interest
group the consumer group for this consumer
host the ipaddress or host name for the kafka server as a string
port the port name the kafka server is listening on as a string
'''
if (isinstance(host, str) == False or
isinstance(port, str) == False or
isinstance(topic, str) == False):
raise Exception("unexepcted kafka server parameters provided")
self.consumer = KafkaConsumer(topic,
group_id=group_id,
bootstrap_servers=["%s:%s" % (host, port)],
consumer_timeout_ms=-1)
print("In KafkaEventsConsumer.__init__ Created KafkaConsumer instance: {0}".format(self.consumer))
self.topic = topic
self.strategy = strategy
self.worker = threading.Thread(target=lambda : self.collect_messages())
print("In KafkaEventsConsumer.__init__ Created worker thread: {0}".format(self.worker))
self.worker.start()
def collect_messages(self):
'''Collects raw messages from the consumer for storage in an instance queue'''
try:
print ("In KafkaEventsConsumer waiting for events delivery")
for msg in self.consumer:
print("KafkaConsumer collected 1 msg from topic: {0} in collect_messages".format(self.topic))
self.strategy.handle_message(msg)
# Mark this message as fully consumed
# so it can be included in the next commit
self.consumer.task_done(msg)
# Commit the message that was just consumed
self.consumer.commit()
except Exception as err:
print("Exception: {0}".format(err))
pass
示例6: main
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def main():
# more advanced consumer -- multiple topics w/ auto commit offset
# management
consumer = KafkaConsumer('email', bootstrap_servers=['localhost:9092'], group_id='None', auto_commit_enable=True, auto_commit_interval_ms=30 * 1000, auto_offset_reset='smallest')
# Infinite iteration
for m in consumer:
# do_some_work(m)
# Mark this message as fully consumed
# so it can be included in the next commit
#
# **messages that are not marked w/ task_done currently do not commit!
#print 'm is ',m
#post data example is ,{"body":{"aa":["1","2",3]}}
try:
msg = eval(m.value)
print ("email eval msg is %s") % (str(msg.keys()))
except:
print ("email error msg is %s") % (m)
"""
import json
msg = json.loads(m.value)
print('json msg is %s' % (str(msg)))
print
"""
#format mail info entry
mailParam = formatEmailEntry(msg)
#send mail entry info
sendMail(mailParam)
#end send mail
#end format
consumer.task_done(m)
# If auto_commit_enable is False, remember to commit() periodically
consumer.commit()
# Batch process interface
while True:
for m in kafka.fetch_messages():
# process_message(m)
consumer.task_done(m)
time.sleep(1)
示例7: main
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def main():
signal.signal(signal.SIGINT, signal_handling)
# To consume messages
consumer = KafkaConsumer('test',
client_id='consumer-client-id',
group_id='consumer-script',
bootstrap_servers=['localhost:9092'],
# auto_offset_reset="smallest",
auto_commit_enable=True,
auto_commit_interval_ms=1000,
auto_commit_interval_messages=1)
for message in consumer:
# message value is raw byte string -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print("{}:{}:{}: key={} value={}".format(message.topic,
message.partition,
message.offset,
message.key,
message.value.decode('utf-8')))
consumer.task_done(message)
示例8: testRestart
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
def testRestart():
'''test restart
测试某一个group在消费了kafka中的消息并commit之后,是否能重新读取?
->事实是不行
'''
topicsList = ['JOB_NGINX', 'JOB_BASIC', 'JOB_JMON', 'JOB_TOMCAT']
brokerList = ['10.10.90.171:9092', '10.10.82.114:9092', '10.10.94.15:9092']
groupId = '8_consumer_group'
kc = KafkaConsumer(
*topicsList,
fetch_min_bytes = 1024,
group_id = groupId,
bootstrap_servers = brokerList,
consumer_timeout_ms = 10*1000,
auto_offset_reset='smallest'
)
print kc.offsets()
for partition in [0, 1, 2]:
consumerMsg = KafkaMessage('JOB_BASIC', \
partition, 2000, 'shit', 'shit')
kc.task_done(consumerMsg)
print kc.offsets()
kc.set_topic_partitions(
('JOB_BASIC', 0, 2000),
('JOB_BASIC', 1, 2000),
('JOB_BASIC', 2, 1000))
import pdb
pdb.set_trace()
while 1:
try:
for consumer in kc:
print consumer
print type(consumer)
kc.task_done(consumer)
except ConsumerTimeout:
kc.commit()
print 'xxxxxxxxxxxxxxxxx'
continue
示例9: do_some_work
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
auto_commit_enable=True,
auto_commit_interval_ms=30 * 1000,
auto_offset_reset="smallest",
)
# Infinite iteration
for message in consumer:
# do_some_work(m)
print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
fd = log.getLogFile()
msg = generateLog(message.value)
if msg:
fd.write(msg)
fd.write("\n")
fd.flush()
# Mark this message as fully consumed
# so it can be included in the next commit
#
# **messages that are not marked w/ task_done currently do not commit!
consumer.task_done(message)
# If auto_commit_enable is False, remember to commit() periodically
consumer.commit()
# Batch process interface
while True:
for m in kafka.fetch_messages():
process_message(m)
consumer.task_done(m)
#'''
示例10: Kafka
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
class Kafka(BaseThreadedModule):
"""
Simple kafka input.
Configuration template:
- Kafka:
brokers: # <type: list; is: required>
topics: # <type: string||list; is: required>
client_id: # <default: 'kafka.consumer.kafka'; type: string; is: optional>
group_id: # <default: None; type: None||string; is: optional>
fetch_message_max_bytes: # <default: 1048576; type: integer; is: optional>
fetch_min_bytes: # <default: 1; type: integer; is: optional>
fetch_wait_max_ms: # <default: 100; type: integer; is: optional>
refresh_leader_backoff_ms: # <default: 200; type: integer; is: optional>
socket_timeout_ms: # <default: 10000; type: integer; is: optional>
auto_offset_reset: # <default: 'largest'; type: string; is: optional>
auto_commit_enable: # <default: False; type: boolean; is: optional>
auto_commit_interval_ms: # <default: 60000; type: integer; is: optional>
consumer_timeout_ms: # <default: -1; type: integer; is: optional>
receivers:
- NextModule
"""
module_type = "input"
"""Set module type"""
can_run_forked = True
def configure(self, configuration):
# Call parent configure method.
BaseThreadedModule.configure(self, configuration)
self.auto_commit_enable = self.getConfigurationValue('auto_commit_enable')
def initAfterFork(self):
try:
self.consumer = KafkaConsumer(self.getConfigurationValue('topics'),
client_id=self.getConfigurationValue('client_id'),
group_id=self.getConfigurationValue('group_id'),
fetch_message_max_bytes=self.getConfigurationValue('fetch_message_max_bytes'),
fetch_min_bytes=self.getConfigurationValue('fetch_min_bytes'),
fetch_wait_max_ms=self.getConfigurationValue('fetch_wait_max_ms'),
refresh_leader_backoff_ms=self.getConfigurationValue('refresh_leader_backoff_ms'),
metadata_broker_list=self.getConfigurationValue('brokers'),
socket_timeout_ms=self.getConfigurationValue('socket_timeout_ms'),
auto_offset_reset=self.getConfigurationValue('auto_offset_reset'),
auto_commit_enable=self.getConfigurationValue('auto_commit_enable'),
auto_commit_interval_ms=self.getConfigurationValue('auto_commit_interval_ms'),
consumer_timeout_ms=self.getConfigurationValue('consumer_timeout_ms')
)
except:
etype, evalue, etb = sys.exc_info()
self.logger.error("Could not create kafka consumer. Exception: %s, Error: %s." % ( etype, evalue))
self.lumbermill.shutDown()
def run(self):
while self.alive:
for kafka_event in self.consumer:
event = DictUtils.getDefaultEventDict(dict={"topic": kafka_event.topic, "data": kafka_event.value}, caller_class_name=self.__class__.__name__)
self.sendEvent(event)
if(self.auto_commit_enable):
self.consumer.task_done(kafka_event)
示例11: KafkaMessageSensor
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
class KafkaMessageSensor(Sensor):
"""
Read multiple topics from Apache Kafka cluster and auto-commit offset (mark tasks as finished).
If responded topic message is JSON - try to convert it to object for reuse inside st2.
"""
TRIGGER = 'kafka.new_message'
DEFAULT_GROUP_ID = 'st2-sensor-group'
DEFAULT_CLIENT_ID = 'st2-kafka-consumer'
def __init__(self, sensor_service, config=None):
"""
Parse config variables, set defaults.
"""
super(KafkaMessageSensor, self).__init__(sensor_service=sensor_service, config=config)
self._logger = self._sensor_service.get_logger(__name__)
message_sensor = self._config.get('message_sensor')
if not message_sensor:
raise ValueError('[KafkaMessageSensor]: "message_sensor" config value is required!')
self._hosts = message_sensor.get('hosts')
if not self._hosts:
raise ValueError('[KafkaMessageSensor]: "message_sensor.hosts" config value is required!')
self._topics = set(message_sensor.get('topics', []))
if not self._topics:
raise ValueError('[KafkaMessageSensor]: "message_sensor.topics" should list at least one topic!')
# set defaults for empty values
self._group_id = message_sensor.get('group_id') or self.DEFAULT_GROUP_ID
self._client_id = message_sensor.get('client_id') or self.DEFAULT_CLIENT_ID
self._consumer = None
def setup(self):
"""
Create connection and initialize Kafka Consumer.
"""
self._logger.debug('[KafkaMessageSensor]: Initializing consumer ...')
self._consumer = KafkaConsumer(*self._topics,
client_id=self._client_id,
group_id=self._group_id,
bootstrap_servers=self._hosts,
deserializer_class=self._try_deserialize)
self._ensure_topics_existence()
def _ensure_topics_existence(self):
"""
Ensure that topics we're listening to exist.
Fetching metadata for a non-existent topic will automatically try to create it
with the default replication factor and number of partitions (default server config).
Otherwise Kafka server is not configured to auto-create topics and partitions.
"""
map(self._consumer._client.ensure_topic_exists, self._topics)
self._consumer.set_topic_partitions(*self._topics)
def run(self):
"""
Run infinite loop, continuously reading for Kafka message bus,
dispatch trigger with payload data if message received.
"""
self._logger.debug('[KafkaMessageSensor]: Entering into listen mode ...')
for message in self._consumer:
self._logger.debug(
"[KafkaMessageSensor]: Received %s:%d:%d: key=%s message=%s" %
(message.topic, message.partition,
message.offset, message.key, message.value)
)
payload = {
'topic': message.topic,
'partition': message.partition,
'offset': message.offset,
'key': message.key,
'message': message.value,
}
self._sensor_service.dispatch(trigger=self.TRIGGER, payload=payload)
# Mark this message as fully consumed
self._consumer.task_done(message)
self._consumer.commit()
def cleanup(self):
"""
Close connection, just to be sure.
"""
self._consumer._client.close()
def add_trigger(self, trigger):
pass
def update_trigger(self, trigger):
pass
def remove_trigger(self, trigger):
pass
@staticmethod
def _try_deserialize(body):
"""
Try to deserialize received message body.
#.........这里部分代码省略.........
示例12: KafkaConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
from kafka import KafkaConsumer
# more advanced consumer -- multiple topics w/ auto commit offset
# management
consumer = KafkaConsumer('topic1', 'topic2',
bootstrap_servers=['localhost:9092'],
group_id='my_consumer_group',
auto_commit_enable=True,
auto_commit_interval_ms=30 * 1000,
auto_offset_reset='smallest')
# Infinite iteration
for m in consumer:
# do_some_work(m)
# Mark this message as fully consumed
# so it can be included in the next commit
#
# **messages that are not marked w/ task_done currently do not commit!
consumer.task_done(m)
# If auto_commit_enable is False, remember to commit() periodically
consumer.commit()
# Batch process interface
while True:
for m in kafka.fetch_messages():
# process_message(m)
consumer.task_done(m)
示例13: GreenFeedConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
class GreenFeedConsumer(threading.Thread):
"""
Greenlet based feed consumer
All callbacks are spawned as greenlets on a background thread
keyword arguments :
broker (list): List of initial broker nodes the consumer should contact to
bootstrap initial cluster metadata. This does not have to be the full node list.
It just needs to have at least one broker
group (str): the name of the consumer group to join, Offsets are fetched /
committed to this group name.
offset='smallest' : read all msgs from beginning of time; default read fresh
commit_every_t_ms: How much time (in milliseconds) to before commit to zookeeper
kill_signal: What is the kill signal to handle exit gracefully.
wait_time_before_exit: How much time to wait before exiting green threads
"""
daemon = True
def __init__(self, broker, group, offset='largest', commit_every_t_ms=1000,
parts=None, kill_signal=signal.SIGTERM, wait_time_before_exit=10):
self.brokerurl = broker
self.kill_signal = kill_signal
self.exit_consumer = False
self.wait_time_before_exit = wait_time_before_exit
self.create_kill_signal_handler()
try:
self.cons = KafkaConsumer(bootstrap_servers=broker,
auto_offset_reset=offset,
auto_commit_enable=True,
auto_commit_interval_ms=commit_every_t_ms,
group_id=group
)
except KafkaUnavailableError:
log.critical( "\nCluster Unavailable %s : Check broker string\n", broker)
raise
except:
raise
self.topics = []
self.callbacks = {}
super(GreenFeedConsumer, self).__init__()
def add_topic(self, topic, todo , parts=None):
"""
Set the topic/partitions to consume
todo (callable) : callback for the topic
NOTE: Callback is for entire topic, if you call this for multiple
partitions for same topic with diff callbacks, only the last callback
is retained
topic : topic to listen to
parts (list) : tuple of the partitions to listen to
"""
self.callbacks[topic] = todo
if parts is None:
log.info(" GreenConsumer : adding topic %s ", topic)
self.topics.append(topic)
else:
for part in parts:
log.info(" GreenConsumer : adding topic %s %s", topic , part)
self.topics.append((topic,part))
self.cons._client.ensure_topic_exists(topic)
self.cons.set_topic_partitions(*self.topics)
def remove_topic(self, topic, parts=None):
try:
if parts is None:
self.topics.remove(topic)
else:
for part in parts:
self.topics.remove((topic,part))
except:
log.critical("GreenConsumer : no such topic %s", topic)
return
log.info(" GreenConsumer : removed topic %s", topic)
self.cons.set_topic_partitions(*self.topics)
def create_kill_signal_handler(self):
def set_stop_signal(signal, frame):
self.exit_consumer = True
signal.signal(self.kill_signal, set_stop_signal)
def wrap(self, callback, mesg):
callback(mesg.key, mesg.value)
self.cons.task_done(mesg)
#.........这里部分代码省略.........
示例14: KafkaMQ
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import task_done [as 别名]
class KafkaMQ(MQBase):
"""
kafka://sparkh1:9092,sparkh2:9092,sparkh3:9092?topics=xx&group_id=xx
"""
_CLIENT_ID_ = 'PyThunderKafkaMQCli'
_GROUP_ID_ = 'PyThunderKafkaMQCliGroup'
maxLowTimeout = 0.1
def __init__(self, url):
super(KafkaMQ, self).__init__(url=url)
consumerKwargs, producerKwargs = self.fromUrl(url)
self._topic = consumerKwargs.pop('topic', '')
topics = self._topic.split(',')
self._consumer = KafkaConsumer(*topics, **consumerKwargs)
self._producer = Producer(**producerKwargs)
def qsize(self):
try:
with Timeout(seconds=self.maxLowTimeout, exception=None, ):
print('no offsets: {}'.format(self._get(noCommit=True)))
except:
pass
localOffsets = self._consumer.offsets()
return localOffsets
def _get(self, noCommit=False):
with Timeout(seconds=self.maxLowTimeout, exception=self.Empty):
kafkaMessage = self._consumer.next()
if noCommit:
return kafkaMessage
if kafkaMessage:
try:
return kafkaMessage.value
finally:
self._commit(kafkaMessage)
else:
raise self.Empty
def _put(self, item):
with Timeout(seconds=self.maxLowTimeout, exception=self.Full):
status = self._producer.send_messages(self._topic, None, *[item])
if status:
return 10
def _commit(self, message):
try:
self._consumer.task_done(message)
self._consumer.commit()
except:
pass
def close(self):
try:
del self._consumer
self._producer.close()
del self._producer
except:
pass
def fromUrl(self, url):
urlInfo = urlparse(url)
qs = urlInfo.query and urlInfo.query or ''
kwargs = dict()
options = dict()
options['hosts'] = urlInfo.netloc
options['topic'] = urlInfo.path.strip('/')
for name, value in six.iteritems(dict(parse_qsl(qs))):
if value:
options[name] = value
self.maxSize = options.pop('maxSize', 10000)
self.lazyLimit = options.pop('lazyLimit', True)
options.setdefault('group', self._GROUP_ID_ + '-{}'.format(id(self)))
# options.setdefault('group')
options.setdefault('client', self._CLIENT_ID_)
if urlInfo.scheme != 'kafka':
raise AttributeError('schema {} not supported'.format(urlInfo.scheme))
else:
for name, value in six.iteritems(options):
mirror = urlOptMaps.get(name)
if mirror:
value = mirror[1](value)
if mirror == 'bootstrap_servers':
value = value.split(',')
kwargs[mirror[0]] = value
else:
kwargs[name] = value
return kwargs, {
'hosts': options.pop('hosts', '').split(','),
'client_id': options.pop('client_id', self._CLIENT_ID_),
'timeout': options.pop('timeout', 120)
}