当前位置: 首页>>代码示例>>Python>>正文


Python SimpleConsumer.commit方法代码示例

本文整理汇总了Python中kafka.SimpleConsumer.commit方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.commit方法的具体用法?Python SimpleConsumer.commit怎么用?Python SimpleConsumer.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kafka.SimpleConsumer的用法示例。


在下文中一共展示了SimpleConsumer.commit方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)

    #get timestamp
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
    
    #open file for writing
    tempfile_path = "/tmp/kafka_stockTwits_%s_%s_%s_%s.dat" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path,"w")
    log_has_at_least_one = False #did we log at least one entry?
    while True:
        messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
        if not messages:
	       os.system("sleep 300s") # sleep 5mins
	       continue
           
        for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            log_has_at_least_one = True
            #print(message.message.value)
            tempfile.write(message.message.value + "\n")
        if tempfile.tell() > 10000000: #10000000: #file size > 10MB
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit() #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit() #save position in the kafka queue
    return 0
开发者ID:devbhosale,项目名称:HashtagCashtag,代码行数:32,代码来源:writeToHdfsStockTwits.py

示例2: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path, "w")
    #log_has_at_least_one = False #did we log at least one entry?
    while True:
        # get 1000 messages at a time, non blocking
        messages = kafka_consumer.get_messages(count=100, block=False)
        if not messages:
            #print "no messages to read"
            continue   # If no messages are received, wait until there are more
        for message in messages:
            #log_has_at_least_one = True
            #print(message.message.value)
            #tempfile.write(message.message.value + "\n")    # lose the '\n'?
            tempfile.write(message.message.value)
        if tempfile.tell() > 120000000:  # file size > 120MB
            print "Note: file is large enough to write to hdfs. Writing now..."
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit()  # inform zookeeper of position in the kafka queue
开发者ID:agilemobiledev,项目名称:Insight-TrafficJam,代码行数:27,代码来源:consumer.py

示例3: consume_save

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_save(group,topic):
	tmp_save=open(tmp_file_path,"w")
	kafka_consumer=SimpleConsumer(kafka,group,topic)
	messages= kafka_consumer.get_messages(count=1000, block=False)
	if not messages:
		print "Consumer didn't read any messages"
	for message in messages:
		tmp_save.write( message.message.value+"\n")
#		print message.message.value+"\n"
	kafka_consumer.commit() # inform zookeeper of position in the kafka queu
	print ".... ... .. .."
	print "Message from topic \"%s\" consumed \n" % topic
开发者ID:filmonhg,项目名称:draft_InsightProject,代码行数:14,代码来源:consumer.py

示例4: consume_save

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_save(group,topic):
	i=0
	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
#		if not messages:
#			print "Consumer didn't read any messages"
		for message in messages:
			tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"
		# file size > 20MB
                if tmp_save.tell() > 20000000:
                    push_to_hdfs(tmp_file_path)
		kafka_consumer.commit() # inform zookeeper of position in the kafka queu
开发者ID:filmonhg,项目名称:Trucker,代码行数:17,代码来源:realtime_consumer_inbound.py

示例5: KafkaSpout

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
class KafkaSpout(Spout):

	def initialize(self, stormconf, context):
		# self.words = itertools.cycle(['dog', 'cat',
		# 								'zebra', 'elephant'])
		self.kafka = KafkaClient("cloud.soumet.com:9092")
		self.consumer = SimpleConsumer(self.kafka, "storm", "realtime", max_buffer_size=1310720000)
		



	def next_tuple(self):
		for message in self.consumer.get_messages(count=500, block=False):#, timeout=1):
			#transaction_data = TransactionFull()
			#transaction_data.ParseFromString(base64.b64decode(message.message.value))
			#self.emit([transaction_data])
			self.emit([message.message.value])
		self.consumer.commit()
开发者ID:ajmssc,项目名称:bitcoin-inspector,代码行数:20,代码来源:kafkaconsumer.py

示例6: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
        def consume_topic(callback_url, consumer_group, topic):
            consumer = None
            try:
                consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False)
                messages_read = 0

                # we can't read messages infinitely here as we have
                # a lot of topics/subscribers (much more than threadpool size)
                while messages_read < self.max_read_messages_per_cycle:

                    # get one message and monitor the time
                    start = monitoring.start_time_measure()
                    message = consumer.get_message(block=False)
                    ms_elapsed = monitoring.stop_time_measure(start)
                    self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed)

                    # if we don't have messages for this topic/subscriber - quit and give chance to others
                    if message is None:
                        logging.info('No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url)
                        break

                    try:
                        event = json.loads(message.message.value.decode('utf-8'))
                        response_status = self.forward_event(callback_url, event, topic)

                        # if status is success - mark message as consumed by this subscriber
                        if 200 <= response_status < 300:
                            consumer.commit()
                        else:
                            logging.info('Received error response fro consumer: %s', response_status)
                    except:
                        logging.error("Exception while sending event to consumer")
                        logging.error(traceback.format_exc())
                    finally:
                        messages_read += 1
                return messages_read

            except UnknownTopicOrPartitionError:
                logging.error('Adding %s to skip list', topic)
            except:
                logging.exception('failed to create kafka client')
            finally:
                if consumer is not None:
                    consumer.stop()
开发者ID:AlexanderYastrebov,项目名称:nakadi,代码行数:46,代码来源:background.py

示例7: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    #get timestamp
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
    
    while True:
        messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
        if not messages:
	       os.system("sleep 30s")
	continue
            #break
        for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            print message
        kafka_consumer.commit() #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit() #save position in the kafka queue
    return 0
开发者ID:devbhosale,项目名称:HashtagCashtag,代码行数:22,代码来源:printScreenTwitter.py

示例8: test_simple_consumer_commit_does_not_raise

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
    def test_simple_consumer_commit_does_not_raise(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]

        def mock_offset_fetch_request(group, payloads, **kwargs):
            return [OffsetFetchResponsePayload(p.topic, p.partition, 0, b'', 0) for p in payloads]

        client.send_offset_fetch_request.side_effect = mock_offset_fetch_request

        def mock_offset_commit_request(group, payloads, **kwargs):
            raise FailedPayloadsError(payloads[0])

        client.send_offset_commit_request.side_effect = mock_offset_commit_request

        consumer = SimpleConsumer(client, group='foobar',
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        # Mock internal commit check
        consumer.count_since_commit = 10

        # This should not raise an exception
        self.assertFalse(consumer.commit(partitions=[0, 1]))
开发者ID:Abhishek-Dutta,项目名称:kafka-python,代码行数:25,代码来源:test_consumer.py

示例9: _run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
开发者ID:gtesseyre,项目名称:contrail-controller,代码行数:74,代码来源:partition_handler.py

示例10: str

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
                        ph.start()
                        workers[int(mm.key)] = ph
                elif mm.value == "stop":
                    #import pdb; pdb.set_trace()
                    if workers.has_key(int(mm.key)):
                        ph = workers[int(mm.key)]
                        gevent.kill(ph)
                        res,db = ph.get()
                        print "Returned " + str(res)
                        print "State :"
                        for k,v in db.iteritems():
                            print "%s -> %s" % (k,str(v)) 
                        del workers[int(mm.key)]
                else:
                    end_ready = True
                    cons.commit()
		    gevent.sleep(2)
                    break
	except TypeError:
	    gevent.sleep(0.1)
	except common.FailedPayloadsError as ex:
	    print "Payload Error: " + str(ex.args)
	    gevent.sleep(0.1)
    lw=[]
    for key, value in workers.iteritems():
        gevent.kill(value)
        lw.append(value)

    gevent.joinall(lw)
    print "Ending Consumers"
开发者ID:gtesseyre,项目名称:contrail-controller,代码行数:32,代码来源:partition_handler.py

示例11: _run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.info("New KafkaClient %d" % self._partition)
                kafka = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.info("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))
                self.start_partition()

                # start reading from last previously processed message
                consumer.seek(0,1)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mm = consumer.get_message(timeout=None)
                        if mm is None:
                            continue
                        self._logger.debug("%d Reading offset %d" % (self._partition, mm.offset))
                        consumer.commit()
                        pcount += 1
		        if not self.msg_handler(mm):
                            self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                            raise gevent.GreenletExit
                    except TypeError:
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.info("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.info("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)
        self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
        return self._partoffset, self._partdb
开发者ID:GIC-de,项目名称:contrail-controller,代码行数:64,代码来源:partition_handler.py

示例12: _run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10,timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb
开发者ID:chihchum,项目名称:contrail-controller,代码行数:87,代码来源:partition_handler.py

示例13: KafkaClient

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
from __future__ import absolute_import, print_function#, unicode_literals

import itertools
from streamparse.spout import Spout

import base64
import sys



from kafka import KafkaClient, SimpleProducer, SimpleConsumer
#from kafka.client import KafkaClient
#from kafka.consumer import SimpleConsumer


kafka = KafkaClient("cloud.soumet.com:9092")
kafka_consumer = SimpleConsumer(kafka, "storm", "realtime", max_buffer_size=1310720000)#, max_buffer_size=1310720000)
		
for message in kafka_consumer.get_messages(count=5000, block=False):#, block=True, timeout=4):
	print(message.message.value)

kafka_consumer.commit()
开发者ID:ajmssc,项目名称:bitcoin-inspector,代码行数:24,代码来源:storm_transactions.py

示例14: AbstractPersister

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import commit [as 别名]
class AbstractPersister(threading.Thread):
    def __init__(self, kafka_conf, influxdb_conf):

        super(AbstractPersister, self).__init__()

        kafka = KafkaClient(kafka_conf.uri)
        self._consumer = SimpleConsumer(kafka,
                                        kafka_conf.group_id,
                                        kafka_conf.topic,
                                        # Set to true even though we actually do
                                        # the commits manually. Needed to
                                        # initialize
                                        # offsets correctly.
                                        auto_commit=True,
                                        # Make these values None so that the
                                        # manual commit will do the actual
                                        # commit.
                                        # Needed so that offsets are initialized
                                        # correctly. If not done, then restarts
                                        # will reread messages from beginning of
                                        # the queue.
                                        auto_commit_every_n=None,
                                        auto_commit_every_t=None,
                                        iter_timeout=1)

        self._influxdb_client = InfluxDBClient(influxdb_conf.ip_address,
                                               influxdb_conf.port,
                                               influxdb_conf.user,
                                               influxdb_conf.password,
                                               influxdb_conf.database_name)

        self._max_wait_time_secs = kafka_conf.max_wait_time_seconds
        self._batch_size = kafka_conf.batch_size
        self._kafka_topic = kafka_conf.topic

        self._json_body = []
        self._last_flush = datetime.now()

    @abc.abstractmethod
    def process_message(self, message):
        pass

    def _flush(self):

        if self._json_body:
            self._influxdb_client.write_points(self._json_body)
            self._consumer.commit()
            LOG.info("processed {} messages from topic '{}'".format(
                len(self._json_body), self._kafka_topic))
            self._json_body = []
        self._last_flush = datetime.now()

    def run(self):

        try:

            while True:

                delta_time = datetime.now() - self._last_flush
                if delta_time.seconds > self._max_wait_time_secs:
                    self._flush()

                for message in self._consumer:
                    try:
                        self._json_body.append(self.process_message(message))
                    except Exception:
                        LOG.exception('Error processing message. Message is '
                                      'being dropped. {}'.format(message))
                    if len(self._json_body) >= self._batch_size:
                        self._flush()

        except:
            LOG.exception(
                'Persister encountered fatal exception processing messages. '
                'Shutting down all threads and exiting')
            os._exit(1)
开发者ID:carriercomm,项目名称:monasca-persister,代码行数:78,代码来源:persister.py


注:本文中的kafka.SimpleConsumer.commit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。