当前位置: 首页>>代码示例>>Python>>正文


Python SimpleConsumer.get_messages方法代码示例

本文整理汇总了Python中kafka.SimpleConsumer.get_messages方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.get_messages方法的具体用法?Python SimpleConsumer.get_messages怎么用?Python SimpleConsumer.get_messages使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kafka.SimpleConsumer的用法示例。


在下文中一共展示了SimpleConsumer.get_messages方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_switch_leader_simple_consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
 def test_switch_leader_simple_consumer(self):
     producer = Producer(self.client, async=False)
     consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
     self._send_random_messages(producer, self.topic, 0, 2)
     consumer.get_messages()
     self._kill_leader(self.topic, 0)
     consumer.get_messages()
开发者ID:jianbin-wei,项目名称:kafka-python,代码行数:9,代码来源:test_failover_integration.py

示例2: test_simple_consumer_failed_payloads

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
    def test_simple_consumer_failed_payloads(self):
        client = MagicMock()
        consumer = SimpleConsumer(client, group=None,
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        def failed_payloads(payload):
            return FailedPayloadsError(payload)

        client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)

        # This should not raise an exception
        consumer.get_messages(5)
开发者ID:Abhishek-Dutta,项目名称:kafka-python,代码行数:15,代码来源:test_consumer.py

示例3: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path, "w")
    #log_has_at_least_one = False #did we log at least one entry?
    while True:
        # get 1000 messages at a time, non blocking
        messages = kafka_consumer.get_messages(count=100, block=False)
        if not messages:
            #print "no messages to read"
            continue   # If no messages are received, wait until there are more
        for message in messages:
            #log_has_at_least_one = True
            #print(message.message.value)
            #tempfile.write(message.message.value + "\n")    # lose the '\n'?
            tempfile.write(message.message.value)
        if tempfile.tell() > 120000000:  # file size > 120MB
            print "Note: file is large enough to write to hdfs. Writing now..."
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit()  # inform zookeeper of position in the kafka queue
开发者ID:agilemobiledev,项目名称:Insight-TrafficJam,代码行数:27,代码来源:consumer.py

示例4: KafkaConsumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class KafkaConsumer(object):
    def __init__(self, conf):
        self.log = logging.getLogger(__name__)
        self.client = KafkaClient(conf["kafka_server"])
        self.total_inserts = 0
        self.inserts = 0
        self.listenstore = None


    def start_listens(self, listenstore):
        self.listenstore = listenstore
        return self.start(b"listen-group", b"listens")


    def start(self, group_name, topic_name):
        self.group_name = group_name
        self.topic_name = topic_name
        self.log.info("KafkaConsumer subscribed to %s -> %s" % (group_name, topic_name))
        self.consumer = SimpleConsumer(self.client, self.group_name, self.topic_name)

        t0 = 0
        last_offset = -1
        while True:
            listens = []
            if t0 == 0:
                t0 = time()

            messages = self.consumer.get_messages(count=CASSANDRA_BATCH_SIZE, block=True, timeout=KAFKA_READ_TIMEOUT)
            for message in messages:
                try:
                    data = ujson.loads(message.message.value)
                    listens.append(Listen.from_json(data))
                except ValueError as e:
                    self.log.error("Cannot parse JSON: %s\n'%s'" % (str(e), message.message.value))
                    continue

                last_offset = message.offset

            if listens:
                broken = True
                while broken:
                    try:
                        self.listenstore.insert_batch(listens)
                        broken = False
                    except ValueError as e:
                        self.log.error("Cannot insert listens: %s" % unicode(e))
                        broken = False
                    except NoHostAvailable as e:
                        self.log.error("Cannot insert listens: %s. Sleeping, trying again." % unicode(e))
                        sleep(5)


            self.inserts += len(messages)
            if self.inserts >= REPORT_FREQUENCY:
                t1 = time()
                self.total_inserts += self.inserts
                self.log.info("Inserted %d rows in %.1fs (%.2f listens/sec). Total %d rows. last offset: %d" % \
                    (self.inserts, t1 - t0, self.inserts / (t1 - t0), self.total_inserts, last_offset))
                self.inserts = 0
                t0 = 0
开发者ID:LordSputnik,项目名称:listenbrainz-server,代码行数:62,代码来源:kafkaconsumer.py

示例5: test_ts

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
    def test_ts(self):

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        # consumer = SimpleConsumer(kafka, "my-group112", "test")
        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
                                  fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)

        while True:
            print("HELLO")
            # Prepare data for insert and copy to S3
            # data_str = StringIO()
            count = 0
            # last_offset = 2

            consumer.seek(2, 0)

            for message in consumer.get_messages(count=100, block=False, timeout=0.1):
                count += 1

                print(message.message.value)

            #     # Write tweets to StringIO
            #     self.write_to_data_str(message, data_str)

            # # Store batch tweets to S3
            # self.write_to_s3(data_str, last_offset)

            if count != 100:
                break
开发者ID:eatseng,项目名称:insight,代码行数:32,代码来源:location_streams.py

示例6: ScoringWorker

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class ScoringWorker(object):
    def __init__(self, settings, strategy_module):
        kafka = KafkaClient(settings.get('KAFKA_LOCATION'))
        self._producer = SimpleProducer(kafka, codec=CODEC_SNAPPY)
        partition_id = settings.get('SCORING_PARTITION_ID')
        if partition_id == None or type(partition_id) != int:
            raise AttributeError("Scoring worker partition id isn't set.")
        self._in_consumer = SimpleConsumer(kafka,
                                       settings.get('SCORING_GROUP'),
                                       settings.get('INCOMING_TOPIC'),
                                       buffer_size=1048576,
                                       max_buffer_size=10485760,
                                       partitions=[partition_id])

        self._manager = FrontierManager.from_settings(settings)
        self._decoder = Decoder(self._manager.request_model, self._manager.response_model)
        self._encoder = Encoder(self._manager.request_model)

        self.consumer_batch_size = settings.get('CONSUMER_BATCH_SIZE', 128)
        self.outgoing_topic = settings.get('SCORING_TOPIC')
        self.strategy = strategy_module.CrawlingStrategy()
        self.backend = self._manager.backend
        self.stats = {}
        self.cache_flush_counter = 0
        self.job_id = 0


    def work(self):
        consumed = 0
        batch = []
        fingerprints = set()
        try:
            for m in self._in_consumer.get_messages(count=self.consumer_batch_size, block=True, timeout=1.0):
                try:
                    msg = self._decoder.decode(m.message.value)
                except (KeyError, TypeError), e:
                    logger.error("Decoding error: %s", e)
                    continue
                else:
                    type = msg[0]
                    batch.append(msg)
                    if type == 'add_seeds':
                        _, seeds = msg
                        fingerprints.update(map(lambda x: x.meta['fingerprint'], seeds))
                        continue

                    if type == 'page_crawled':
                        _, response, links = msg
                        fingerprints.add(response.meta['fingerprint'])
                        fingerprints.update(map(lambda x: x.meta['fingerprint'], links))
                        continue

                    if type == 'request_error':
                        _, request, error = msg
                        fingerprints.add(request.meta['fingerprint'])
                        continue

                    raise TypeError('Unknown message type %s' % type)
                finally:
                    consumed += 1
开发者ID:vu3jej,项目名称:distributed-frontera,代码行数:62,代码来源:score.py

示例7: test_simple_consumer_unknown_topic_partition

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
    def test_simple_consumer_unknown_topic_partition(self):
        client = MagicMock()
        consumer = SimpleConsumer(client, group=None,
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        # Mock so that only the first request gets a valid response
        def unknown_topic_partition(request):
            return FetchResponsePayload(request.topic, request.partition,
                                 UnknownTopicOrPartitionError.errno, -1, ())

        client.send_fetch_request.side_effect = self.fail_requests_factory(unknown_topic_partition)

        # This should not raise an exception
        with self.assertRaises(UnknownTopicOrPartitionError):
            consumer.get_messages(20)
开发者ID:Abhishek-Dutta,项目名称:kafka-python,代码行数:18,代码来源:test_consumer.py

示例8: consume_topic

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)

    #get timestamp
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
    
    #open file for writing
    tempfile_path = "/tmp/kafka_stockTwits_%s_%s_%s_%s.dat" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path,"w")
    log_has_at_least_one = False #did we log at least one entry?
    while True:
        messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
        if not messages:
	       os.system("sleep 300s") # sleep 5mins
	       continue
           
        for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            log_has_at_least_one = True
            #print(message.message.value)
            tempfile.write(message.message.value + "\n")
        if tempfile.tell() > 10000000: #10000000: #file size > 10MB
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit() #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit() #save position in the kafka queue
    return 0
开发者ID:devbhosale,项目名称:HashtagCashtag,代码行数:32,代码来源:writeToHdfsStockTwits.py

示例9: consume_save

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
#	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
		if not messages:
			print "Consumer didn't read any messages"
		for message in messages:
	#		tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"
开发者ID:filmonhg,项目名称:draft_InsightProject,代码行数:12,代码来源:realtime_consumer.py

示例10: test_simple_consumer_leader_change

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
    def test_simple_consumer_leader_change(self):
        client = MagicMock()
        consumer = SimpleConsumer(client, group=None,
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        # Mock so that only the first request gets a valid response
        def not_leader(request):
            return FetchResponsePayload(request.topic, request.partition,
                                 NotLeaderForPartitionError.errno, -1, ())

        client.send_fetch_request.side_effect = self.fail_requests_factory(not_leader)

        # This should not raise an exception
        consumer.get_messages(20)

        # client should have updated metadata
        self.assertGreaterEqual(client.reset_topic_metadata.call_count, 1)
        self.assertGreaterEqual(client.load_metadata_for_topics.call_count, 1)
开发者ID:Abhishek-Dutta,项目名称:kafka-python,代码行数:21,代码来源:test_consumer.py

示例11: consume_save

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
	tmp_save=open(tmp_file_path,"w")
	kafka_consumer=SimpleConsumer(kafka,group,topic)
	messages= kafka_consumer.get_messages(count=1000, block=False)
	if not messages:
		print "Consumer didn't read any messages"
	for message in messages:
		tmp_save.write( message.message.value+"\n")
#		print message.message.value+"\n"
	kafka_consumer.commit() # inform zookeeper of position in the kafka queu
	print ".... ... .. .."
	print "Message from topic \"%s\" consumed \n" % topic
开发者ID:filmonhg,项目名称:draft_InsightProject,代码行数:14,代码来源:consumer.py

示例12: run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
    def run(self, options=None):

        # try:

        # Create table if it doesn't exist in the database
        if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
            self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
                                  buffer_size=2000000000, max_buffer_size=2000000000)

        while True:

            # Prepare data for insert and copy to S3
            data_str = StringIO()
            csv_str = StringIO()
            count = 0

            # Get Offset from previous read
            s3_last_offset = self.get_s3_offset()

            (last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
            last_offset = last_offset if last_offset else 0

            # Resolve difference in offset (s3 offset does not carry over from day to day)
            if s3_last_offset > last_offset:
                last_offset = s3_last_offset
                self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            print(last_offset)

            # Read from Offset
            consumer.seek(last_offset, 0)

            for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):

                # Write tweets to StringIO
                self.write_to_data_str(message, data_str, csv_str)

                count += 1
                last_offset += 1

            # Store batch tweets to S3
            self.write_to_s3(data_str, csv_str, last_offset)

            # Track Kafka Offset
            self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            if count != self.BATCH_SIZE:
                break
开发者ID:eatseng,项目名称:insight,代码行数:54,代码来源:location_streams.py

示例13: Consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, conn, topic, group, partition_id):
        self._conn = conn
        self._group = group
        self._topic = topic
        self._partition_ids = [partition_id] if partition_id is not None else None

        self._cons = None
        self._connect_consumer()

    def _connect_consumer(self):
        if self._cons is None:
            try:
                self._cons = SimpleConsumer(
                    self._conn,
                    self._group,
                    self._topic,
                    partitions=self._partition_ids,
                    buffer_size=1048576,
                    max_buffer_size=10485760)
            except BrokerResponseError:
                self._cons = None
                logger.warning("Could not connect consumer to Kafka server")
                return False
        return True

    def get_messages(self, timeout=0.1, count=1):
        if not self._connect_consumer():
            yield
        while True:
            try:
                for offmsg in self._cons.get_messages(
                        count,
                        timeout=timeout):
                    try:
                        yield offmsg.message.value
                    except ValueError:
                        logger.warning(
                            "Could not decode {0} message: {1}".format(
                                self._topic,
                                offmsg.message.value))
            except Exception as err:
                logger.warning("Error %s" % err)
            finally:
                break

    def get_offset(self):
        return 0
开发者ID:lopuhin,项目名称:frontera,代码行数:53,代码来源:kafkabus.py

示例14: validate_samza_job

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def validate_samza_job():
    """
  Validates that negate-number negated all messages, and sent the output to 
  samza-test-topic-output.
  """
    logger.info("Running validate_samza_job")
    kafka = util.get_kafka_client()
    kafka.ensure_topic_exists(TEST_OUTPUT_TOPIC)
    consumer = SimpleConsumer(kafka, "samza-test-group", TEST_OUTPUT_TOPIC)
    messages = consumer.get_messages(count=NUM_MESSAGES, block=True, timeout=300)
    message_count = len(messages)
    assert NUM_MESSAGES == message_count, "Expected {0} lines, but found {1}".format(NUM_MESSAGES, message_count)
    for message in map(lambda m: m.message.value, messages):
        assert int(message) < 0, "Expected negative integer but received {0}".format(message)
    kafka.close()
开发者ID:ItsMadPig,项目名称:samza,代码行数:17,代码来源:smoke_tests.py

示例15: consume_save

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
	i=0
	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
#		if not messages:
#			print "Consumer didn't read any messages"
		for message in messages:
			tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"
		# file size > 20MB
                if tmp_save.tell() > 20000000:
                    push_to_hdfs(tmp_file_path)
		kafka_consumer.commit() # inform zookeeper of position in the kafka queu
开发者ID:filmonhg,项目名称:Trucker,代码行数:17,代码来源:realtime_consumer_inbound.py


注:本文中的kafka.SimpleConsumer.get_messages方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。