Python kafka.SimpleConsumer类代码示例

本文整理汇总了Python中kafka.SimpleConsumer类的典型用法代码示例。如果您正苦于以下问题：Python SimpleConsumer类的具体用法？Python SimpleConsumer怎么用？Python SimpleConsumer使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SimpleConsumer类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: consume_topic

def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path, "w")
    #log_has_at_least_one = False #did we log at least one entry?
    while True:
        # get 1000 messages at a time, non blocking
        messages = kafka_consumer.get_messages(count=100, block=False)
        if not messages:
            #print "no messages to read"
            continue   # If no messages are received, wait until there are more
        for message in messages:
            #log_has_at_least_one = True
            #print(message.message.value)
            #tempfile.write(message.message.value + "\n")    # lose the '\n'?
            tempfile.write(message.message.value)
        if tempfile.tell() > 120000000:  # file size > 120MB
            print "Note: file is large enough to write to hdfs. Writing now..."
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit()  # inform zookeeper of position in the kafka queue

开发者ID:agilemobiledev，项目名称:Insight-TrafficJam，代码行数:25，代码来源:consumer.py

示例2: assert_message_count

    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = KafkaClient(hosts)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending < check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True

开发者ID:249550148，项目名称:kafka-python，代码行数:29，代码来源:test_failover_integration.py

示例3: test_switch_leader_simple_consumer

 def test_switch_leader_simple_consumer(self):
     producer = Producer(self.client, async=False)
     consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
     self._send_random_messages(producer, self.topic, 0, 2)
     consumer.get_messages()
     self._kill_leader(self.topic, 0)
     consumer.get_messages()

开发者ID:jianbin-wei，项目名称:kafka-python，代码行数:7，代码来源:test_failover_integration.py

示例4: assert_message_count

    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True

开发者ID:jianbin-wei，项目名称:kafka-python，代码行数:30，代码来源:test_failover_integration.py

示例5: test_ts

    def test_ts(self):

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        # consumer = SimpleConsumer(kafka, "my-group112", "test")
        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
                                  fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)

        while True:
            print("HELLO")
            # Prepare data for insert and copy to S3
            # data_str = StringIO()
            count = 0
            # last_offset = 2

            consumer.seek(2, 0)

            for message in consumer.get_messages(count=100, block=False, timeout=0.1):
                count += 1

                print(message.message.value)

            #     # Write tweets to StringIO
            #     self.write_to_data_str(message, data_str)

            # # Store batch tweets to S3
            # self.write_to_s3(data_str, last_offset)

            if count != 100:
                break

开发者ID:eatseng，项目名称:insight，代码行数:30，代码来源:location_streams.py

示例6: consume

def consume(kafka_host):
    kafka = KafkaClient(kafka_host)
    consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages'])
    producer = SimpleProducer(kafka)
    consumer.max_buffer_size=20*1024*1024
    for msg in consumer:
        page = json.loads(msg.message.value)
        process(page, producer)
    kafka.close()

开发者ID:antyser，项目名称:crawlTwitter，代码行数:9，代码来源:apiExtractUrl.py

示例7: consume_save

def consume_save(group,topic):
#	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
		if not messages:
			print "Consumer didn't read any messages"
		for message in messages:
	#		tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"

开发者ID:filmonhg，项目名称:draft_InsightProject，代码行数:10，代码来源:realtime_consumer.py

示例8: consume_save

def consume_save(group,topic):
	tmp_save=open(tmp_file_path,"w")
	kafka_consumer=SimpleConsumer(kafka,group,topic)
	messages= kafka_consumer.get_messages(count=1000, block=False)
	if not messages:
		print "Consumer didn't read any messages"
	for message in messages:
		tmp_save.write( message.message.value+"\n")
#		print message.message.value+"\n"
	kafka_consumer.commit() # inform zookeeper of position in the kafka queu
	print ".... ... .. .."
	print "Message from topic \"%s\" consumed \n" % topic

开发者ID:filmonhg，项目名称:draft_InsightProject，代码行数:12，代码来源:consumer.py

示例9: setup_capture_new_messages_consumer

def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close()

开发者ID:joshSzep，项目名称:data_pipeline，代码行数:12，代码来源:kafka_docker.py

示例10: run

    def run(self, options=None):

        # try:

        # Create table if it doesn't exist in the database
        if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
            self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
                                  buffer_size=2000000000, max_buffer_size=2000000000)

        while True:

            # Prepare data for insert and copy to S3
            data_str = StringIO()
            csv_str = StringIO()
            count = 0

            # Get Offset from previous read
            s3_last_offset = self.get_s3_offset()

            (last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
            last_offset = last_offset if last_offset else 0

            # Resolve difference in offset (s3 offset does not carry over from day to day)
            if s3_last_offset > last_offset:
                last_offset = s3_last_offset
                self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            print(last_offset)

            # Read from Offset
            consumer.seek(last_offset, 0)

            for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):

                # Write tweets to StringIO
                self.write_to_data_str(message, data_str, csv_str)

                count += 1
                last_offset += 1

            # Store batch tweets to S3
            self.write_to_s3(data_str, csv_str, last_offset)

            # Track Kafka Offset
            self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            if count != self.BATCH_SIZE:
                break

开发者ID:eatseng，项目名称:insight，代码行数:52，代码来源:location_streams.py

示例11: test_simple_consumer_failed_payloads

    def test_simple_consumer_failed_payloads(self):
        client = MagicMock()
        consumer = SimpleConsumer(client, group=None,
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        def failed_payloads(payload):
            return FailedPayloadsError(payload)

        client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)

        # This should not raise an exception
        consumer.get_messages(5)

开发者ID:Abhishek-Dutta，项目名称:kafka-python，代码行数:13，代码来源:test_consumer.py

示例12: HBaseServer

class HBaseServer(threading.Thread):
    """
    HBase thread that will continuously read from Kafka queue
    """

    def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table):
        threading.Thread.__init__(self)
        
        self.kafka = KafkaClient(kafka_url)
        self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
        self.cons.seek(0,2)
        
        self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port)
        self.car_table = self.hbase_connect.table(hbase_table)
        
        self.server_on_flag = True        
        self.m = None
        self.payload = None
        self.vin = None
        self.time = None
        self.data = None
        self.row_key = None
        self.count = 0

    def run(self):
        while self.server_on_flag:

            self.m = self.cons.get_message(block=False)
           
            if (self.m is not None):
                self.payload = json.loads(self.m.message.value)
                self.vin = str(self.payload['vin'])
                self.time = str(self.payload['timestamp'])
                self.data = str(self.payload['data'])
                
                self.row_key = self.vin+self.time
                try:
                    self.car_table.put(self.vin,{'user:mostrecent':self.time})
                    self.car_table.put(self.row_key,{'car:data':self.data})
                    self.count = self.count + 1
                    logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count))     
           
                except Exception as e:
                    logger.info('%s,Data Push into HBase unsuccessful...', e)

            else:
                sleep(1/5)

    def shutdown(self):
        self.server_on_flag = False
        logger.info('HBase Server shutting down...')

开发者ID:PDXostc，项目名称:rvi_backend，代码行数:51，代码来源:hbaseserver.py

示例13: test_simple_consumer_reset_partition_offset

    def test_simple_consumer_reset_partition_offset(self):
        client = MagicMock()

        def mock_offset_request(payloads, **kwargs):
            raise FailedPayloadsError(payloads[0])

        client.send_offset_request.side_effect = mock_offset_request

        consumer = SimpleConsumer(client, group='foobar',
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        # This should not raise an exception
        self.assertEqual(consumer.reset_partition_offset(0), None)

开发者ID:Abhishek-Dutta，项目名称:kafka-python，代码行数:14，代码来源:test_consumer.py

示例14: RVIConsumer

class RVIConsumer(threading.Thread):

    def __init__(self, kafka_addr, topic, vin, web_url):
        threading.Thread.__init__(self)

        self.kafka = KafkaClient(kafka_addr) #kafka_addr
        self.cons = SimpleConsumer(self.kafka, None, topic)
        self.cons.seek(0,2)

        self.vin = vin
        self.web_url = web_url 
        self.flag = True
        self.count = 0
        self.sleep_count = 0
        self.headers = {'Content-Type' : 'application/json'}

    def is_running(self):
        return self.flag
        
    def run(self):
        while self.flag:
            
            #cons = SimpleConsumer(kafka, None, 'rvi')
            m = self.cons.get_message(block=False)
            if (m is not None):
                payload = json.loads(m.message.value)

                if(payload['vin'] == self.vin):
                    self.sleep_count = 0 
                    payloadtoweb = json.dumps(m.message.value)
                    r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers) 
                    if (r.status_code is 200):
                        print m.message.value + " sent successfully\n"        
                    else: 
                        print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code)
                        self.shutdown()       

            else:
                if (self.sleep_count > 100000):
                    print "No new data for %s... Timing out" % self.vin
                    self.shutdown()

                time.sleep(1/5)
                self.sleep_count = self.sleep_count + 1

    def shutdown(self):
        self.flag = False    
        requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers) 
        print "%s consumer thread shutting down" % self.vin

开发者ID:PDXostc，项目名称:rvi_big-data2_dashboard，代码行数:49，代码来源:rviwebconsumer.py

示例15: consume_save

def consume_save(group,topic):
	i=0
	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
#		if not messages:
#			print "Consumer didn't read any messages"
		for message in messages:
			tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"
		# file size > 20MB
                if tmp_save.tell() > 20000000:
                    push_to_hdfs(tmp_file_path)
		kafka_consumer.commit() # inform zookeeper of position in the kafka queu

开发者ID:filmonhg，项目名称:Trucker，代码行数:15，代码来源:realtime_consumer_inbound.py

注：本文中的kafka.SimpleConsumer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。