本文整理汇总了Python中kafka.SimpleConsumer.commit方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.commit方法的具体用法?Python SimpleConsumer.commit怎么用?Python SimpleConsumer.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.SimpleConsumer
的用法示例。
在下文中一共展示了SimpleConsumer.commit方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
#get timestamp
timestamp = standardized_timestamp(frequency)
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
#open file for writing
tempfile_path = "/tmp/kafka_stockTwits_%s_%s_%s_%s.dat" % (topic, group, timestamp, batch_counter)
tempfile = open(tempfile_path,"w")
log_has_at_least_one = False #did we log at least one entry?
while True:
messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
if not messages:
os.system("sleep 300s") # sleep 5mins
continue
for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
log_has_at_least_one = True
#print(message.message.value)
tempfile.write(message.message.value + "\n")
if tempfile.tell() > 10000000: #10000000: #file size > 10MB
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() #save position in the kafka queue
#exit loop
if log_has_at_least_one:
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() #save position in the kafka queue
return 0
示例2: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
timestamp = standardized_timestamp(frequency)
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
#open file for writing
tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
tempfile = open(tempfile_path, "w")
#log_has_at_least_one = False #did we log at least one entry?
while True:
# get 1000 messages at a time, non blocking
messages = kafka_consumer.get_messages(count=100, block=False)
if not messages:
#print "no messages to read"
continue # If no messages are received, wait until there are more
for message in messages:
#log_has_at_least_one = True
#print(message.message.value)
#tempfile.write(message.message.value + "\n") # lose the '\n'?
tempfile.write(message.message.value)
if tempfile.tell() > 120000000: # file size > 120MB
print "Note: file is large enough to write to hdfs. Writing now..."
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() # inform zookeeper of position in the kafka queue
示例3: consume_save
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_save(group,topic):
tmp_save=open(tmp_file_path,"w")
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
if not messages:
print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
# print message.message.value+"\n"
kafka_consumer.commit() # inform zookeeper of position in the kafka queu
print ".... ... .. .."
print "Message from topic \"%s\" consumed \n" % topic
示例4: consume_save
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_save(group,topic):
i=0
tmp_save=open(tmp_file_path,"w")
while True:
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
# if not messages:
# print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
print message.message.value+"\n"
# file size > 20MB
if tmp_save.tell() > 20000000:
push_to_hdfs(tmp_file_path)
kafka_consumer.commit() # inform zookeeper of position in the kafka queu
示例5: KafkaSpout
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
class KafkaSpout(Spout):
def initialize(self, stormconf, context):
# self.words = itertools.cycle(['dog', 'cat',
# 'zebra', 'elephant'])
self.kafka = KafkaClient("cloud.soumet.com:9092")
self.consumer = SimpleConsumer(self.kafka, "storm", "realtime", max_buffer_size=1310720000)
def next_tuple(self):
for message in self.consumer.get_messages(count=500, block=False):#, timeout=1):
#transaction_data = TransactionFull()
#transaction_data.ParseFromString(base64.b64decode(message.message.value))
#self.emit([transaction_data])
self.emit([message.message.value])
self.consumer.commit()
示例6: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(callback_url, consumer_group, topic):
consumer = None
try:
consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False)
messages_read = 0
# we can't read messages infinitely here as we have
# a lot of topics/subscribers (much more than threadpool size)
while messages_read < self.max_read_messages_per_cycle:
# get one message and monitor the time
start = monitoring.start_time_measure()
message = consumer.get_message(block=False)
ms_elapsed = monitoring.stop_time_measure(start)
self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed)
# if we don't have messages for this topic/subscriber - quit and give chance to others
if message is None:
logging.info('No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url)
break
try:
event = json.loads(message.message.value.decode('utf-8'))
response_status = self.forward_event(callback_url, event, topic)
# if status is success - mark message as consumed by this subscriber
if 200 <= response_status < 300:
consumer.commit()
else:
logging.info('Received error response fro consumer: %s', response_status)
except:
logging.error("Exception while sending event to consumer")
logging.error(traceback.format_exc())
finally:
messages_read += 1
return messages_read
except UnknownTopicOrPartitionError:
logging.error('Adding %s to skip list', topic)
except:
logging.exception('failed to create kafka client')
finally:
if consumer is not None:
consumer.stop()
示例7: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
#get timestamp
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
while True:
messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
if not messages:
os.system("sleep 30s")
continue
#break
for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
print message
kafka_consumer.commit() #save position in the kafka queue
#exit loop
if log_has_at_least_one:
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() #save position in the kafka queue
return 0
示例8: test_simple_consumer_commit_does_not_raise
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def test_simple_consumer_commit_does_not_raise(self):
client = MagicMock()
client.get_partition_ids_for_topic.return_value = [0, 1]
def mock_offset_fetch_request(group, payloads, **kwargs):
return [OffsetFetchResponsePayload(p.topic, p.partition, 0, b'', 0) for p in payloads]
client.send_offset_fetch_request.side_effect = mock_offset_fetch_request
def mock_offset_commit_request(group, payloads, **kwargs):
raise FailedPayloadsError(payloads[0])
client.send_offset_commit_request.side_effect = mock_offset_commit_request
consumer = SimpleConsumer(client, group='foobar',
topic='topic', partitions=[0, 1],
auto_commit=False)
# Mock internal commit check
consumer.count_since_commit = 10
# This should not raise an exception
self.assertFalse(consumer.commit(partitions=[0, 1]))
示例9: _run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def _run(self):
pcount = 0
pause = False
while True:
try:
if pause:
gevent.sleep(2)
pause = False
self._logger.error("New KafkaClient %s" % self._topic)
self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
try:
consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
#except:
except Exception as ex:
template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.error("Error: %s trace %s" % \
(messag, traceback.format_exc()))
raise RuntimeError(messag)
self._logger.error("Starting %s" % self._topic)
# Find the offset of the last message that has been queued
consumer.seek(-1,2)
try:
mi = consumer.get_message(timeout=0.1)
consumer.commit()
except common.OffsetOutOfRangeError:
mi = None
#import pdb; pdb.set_trace()
self._logger.info("Last Queued for %s is %s" % \
(self._topic,str(mi)))
# start reading from last previously processed message
if mi != None:
consumer.seek(-1,1)
else:
consumer.seek(0,0)
if self._limit:
raise gevent.GreenletExit
while True:
try:
mlist = consumer.get_messages(10,timeout=0.5)
if not self.msg_handler(mlist):
raise gevent.GreenletExit
consumer.commit()
pcount += len(mlist)
except TypeError as ex:
self._logger.error("Type Error: %s trace %s" % \
(str(ex.args), traceback.format_exc()))
gevent.sleep(0.1)
except common.FailedPayloadsError as ex:
self._logger.error("Payload Error: %s" % str(ex.args))
gevent.sleep(0.1)
except gevent.GreenletExit:
break
except AssertionError as ex:
self._partoffset = ex
break
except Exception as ex:
template = "An exception of type {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.error("%s : traceback %s" % \
(messag, traceback.format_exc()))
self.stop_partition()
pause = True
self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
partdb = self.stop_partition()
return self._partoffset, partdb
示例10: str
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
ph.start()
workers[int(mm.key)] = ph
elif mm.value == "stop":
#import pdb; pdb.set_trace()
if workers.has_key(int(mm.key)):
ph = workers[int(mm.key)]
gevent.kill(ph)
res,db = ph.get()
print "Returned " + str(res)
print "State :"
for k,v in db.iteritems():
print "%s -> %s" % (k,str(v))
del workers[int(mm.key)]
else:
end_ready = True
cons.commit()
gevent.sleep(2)
break
except TypeError:
gevent.sleep(0.1)
except common.FailedPayloadsError as ex:
print "Payload Error: " + str(ex.args)
gevent.sleep(0.1)
lw=[]
for key, value in workers.iteritems():
gevent.kill(value)
lw.append(value)
gevent.joinall(lw)
print "Ending Consumers"
示例11: _run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def _run(self):
pcount = 0
while True:
try:
self._logger.info("New KafkaClient %d" % self._partition)
kafka = KafkaClient(self._brokers ,str(os.getpid()))
try:
consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
#except:
except Exception as ex:
template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.info("%s" % messag)
raise RuntimeError(messag)
self._logger.info("Starting %d" % self._partition)
# Find the offset of the last message that has been queued
consumer.seek(0,2)
try:
mi = consumer.get_message(timeout=0.1)
consumer.commit()
except common.OffsetOutOfRangeError:
mi = None
#import pdb; pdb.set_trace()
self._logger.info("Last Queued for %d is %s" % \
(self._partition,str(mi)))
self.start_partition()
# start reading from last previously processed message
consumer.seek(0,1)
if self._limit:
raise gevent.GreenletExit
while True:
try:
mm = consumer.get_message(timeout=None)
if mm is None:
continue
self._logger.debug("%d Reading offset %d" % (self._partition, mm.offset))
consumer.commit()
pcount += 1
if not self.msg_handler(mm):
self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
raise gevent.GreenletExit
except TypeError:
gevent.sleep(0.1)
except common.FailedPayloadsError as ex:
self._logger.info("Payload Error: %s" % str(ex.args))
gevent.sleep(0.1)
except gevent.GreenletExit:
break
except Exception as ex:
template = "An exception of type {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.info("%s : traceback %s" % \
(messag, traceback.format_exc()))
self.stop_partition()
gevent.sleep(2)
self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
return self._partoffset, self._partdb
示例12: _run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def _run(self):
pcount = 0
while True:
try:
self._logger.error("New KafkaClient %d" % self._partition)
self._kfk = KafkaClient(self._brokers ,str(os.getpid()))
try:
consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
#except:
except Exception as ex:
template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.info("%s" % messag)
raise RuntimeError(messag)
self._logger.error("Starting %d" % self._partition)
# Find the offset of the last message that has been queued
consumer.seek(0,2)
try:
mi = consumer.get_message(timeout=0.1)
consumer.commit()
except common.OffsetOutOfRangeError:
mi = None
#import pdb; pdb.set_trace()
self._logger.info("Last Queued for %d is %s" % \
(self._partition,str(mi)))
# start reading from last previously processed message
if mi != None:
consumer.seek(0,1)
else:
consumer.seek(0,0)
if self._limit:
raise gevent.GreenletExit
while True:
try:
self.resource_check()
mlist = consumer.get_messages(10,timeout=0.2)
for mm in mlist:
if mm is None:
continue
self._logger.debug("%d Reading offset %d" % \
(self._partition, mm.offset))
consumer.commit()
pcount += 1
if not self.msg_handler(mm):
self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
raise gevent.GreenletExit
except TypeError as ex:
self._logger.error("Type Error: %s trace %s" % \
(str(ex.args), traceback.format_exc()))
gevent.sleep(0.1)
except common.FailedPayloadsError as ex:
self._logger.error("Payload Error: %s" % str(ex.args))
gevent.sleep(0.1)
except gevent.GreenletExit:
break
except AssertionError as ex:
self._partoffset = ex
break
except Exception as ex:
template = "An exception of type {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.error("%s : traceback %s" % \
(messag, traceback.format_exc()))
self.stop_partition()
gevent.sleep(2)
partdb = {}
for coll in self._uvedb.keys():
partdb[coll] = {}
for gen in self._uvedb[coll].keys():
partdb[coll][gen] = {}
for tab in self._uvedb[coll][gen].keys():
for rkey in self._uvedb[coll][gen][tab].keys():
uk = tab + ":" + rkey
partdb[coll][gen][uk] = \
set(self._uvedb[coll][gen][tab][rkey].keys())
self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
self.stop_partition()
return self._partoffset, partdb
示例13: KafkaClient
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
from __future__ import absolute_import, print_function#, unicode_literals
import itertools
from streamparse.spout import Spout
import base64
import sys
from kafka import KafkaClient, SimpleProducer, SimpleConsumer
#from kafka.client import KafkaClient
#from kafka.consumer import SimpleConsumer
kafka = KafkaClient("cloud.soumet.com:9092")
kafka_consumer = SimpleConsumer(kafka, "storm", "realtime", max_buffer_size=1310720000)#, max_buffer_size=1310720000)
for message in kafka_consumer.get_messages(count=5000, block=False):#, block=True, timeout=4):
print(message.message.value)
kafka_consumer.commit()
示例14: AbstractPersister
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
class AbstractPersister(threading.Thread):
def __init__(self, kafka_conf, influxdb_conf):
super(AbstractPersister, self).__init__()
kafka = KafkaClient(kafka_conf.uri)
self._consumer = SimpleConsumer(kafka,
kafka_conf.group_id,
kafka_conf.topic,
# Set to true even though we actually do
# the commits manually. Needed to
# initialize
# offsets correctly.
auto_commit=True,
# Make these values None so that the
# manual commit will do the actual
# commit.
# Needed so that offsets are initialized
# correctly. If not done, then restarts
# will reread messages from beginning of
# the queue.
auto_commit_every_n=None,
auto_commit_every_t=None,
iter_timeout=1)
self._influxdb_client = InfluxDBClient(influxdb_conf.ip_address,
influxdb_conf.port,
influxdb_conf.user,
influxdb_conf.password,
influxdb_conf.database_name)
self._max_wait_time_secs = kafka_conf.max_wait_time_seconds
self._batch_size = kafka_conf.batch_size
self._kafka_topic = kafka_conf.topic
self._json_body = []
self._last_flush = datetime.now()
@abc.abstractmethod
def process_message(self, message):
pass
def _flush(self):
if self._json_body:
self._influxdb_client.write_points(self._json_body)
self._consumer.commit()
LOG.info("processed {} messages from topic '{}'".format(
len(self._json_body), self._kafka_topic))
self._json_body = []
self._last_flush = datetime.now()
def run(self):
try:
while True:
delta_time = datetime.now() - self._last_flush
if delta_time.seconds > self._max_wait_time_secs:
self._flush()
for message in self._consumer:
try:
self._json_body.append(self.process_message(message))
except Exception:
LOG.exception('Error processing message. Message is '
'being dropped. {}'.format(message))
if len(self._json_body) >= self._batch_size:
self._flush()
except:
LOG.exception(
'Persister encountered fatal exception processing messages. '
'Shutting down all threads and exiting')
os._exit(1)