本文整理汇总了Python中kafka.consumer.SimpleConsumer.get_message方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.get_message方法的具体用法?Python SimpleConsumer.get_message怎么用?Python SimpleConsumer.get_message使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.consumer.SimpleConsumer
的用法示例。
在下文中一共展示了SimpleConsumer.get_message方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
def run(self):
client = KafkaClient("10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092")
consumer = SimpleConsumer(client, "test-group", "jiketest",auto_commit=False,partitions=self.part)
consumer.seek(0,0)
while True:
message = consumer.get_message(True,60)
self.__offset = message.offset
print message.message.value
示例2: ZKConsumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
#.........这里部分代码省略.........
return
self.consumer = SimpleConsumer(self.client, self.group, self.topic,
partitions=my_partitions,
**self.consumer_kwargs)
self.consumer.provide_partition_info()
self.logger.info("Consumer connected to Kafka: %s", self.consumer.offsets)
def stop(self):
if self.consumer is not None:
self.logger.info('Stopping Kafka consumer')
self.consumer.stop()
self.consumer = None
if self.client is not None:
self.logger.info('Stopping Kafka client')
self.client.close()
self.client = None
if self.zk is not None:
self.logger.info('Stopping ZooKeeper client')
if self.zkp is not None and not self.zkp.failed:
self.zkp.finish()
self.zk.stop()
self.zkp = None
self.zk = None
def commit(self, partitions=None):
"""
Commit offsets for this consumer
partitions: list of partitions to commit, default is to commit
all of them
"""
if self.consumer is None:
return
self.logger.debug('Begin committing offsets for partitions: %s',
partitions if partitions else 'All')
self.consumer.commit(partitions)
self.logger.debug('End committing offsets for partitions: %s',
partitions if partitions else 'All')
def pending(self, partitions=None):
"""
Gets the pending message count
partitions: list of partitions to check for, default is to check all
"""
return self.consumer.pending(partitions)
def provide_partition_info(self):
"""
Indicates that partition info must be returned by the consumer
"""
self.consumer.provide_partition_info()
def seek(self, offset, whence):
"""
Alter the current offset in the consumer, similar to fseek
offset: how much to modify the offset
whence: where to modify it from
0 is relative to the earliest available offset (head)
1 is relative to the current offset
2 is relative to the latest known offset (tail)
"""
self.consumer.seek(offset, whence)
def get_messages(self, count=1, block=True, timeout=0.1):
"""
Fetch the specified number of messages
count: Indicates the maximum number of messages to be fetched
block: If True, the API will block till some messages are fetched.
timeout: If block is True, the function will block for the specified
time (in seconds) until count messages is fetched. If None,
it will block forever.
"""
if self.consumer is None:
return []
else:
try:
messages = self.consumer.get_messages(count, block, timeout)
if not messages and self.zkp.failed:
raise FailedPayloadsError
return messages
except FailedPayloadsError as err:
msg = 'Failed to retrieve payload, restarting consumer'
self.logger.exception(msg)
raise err
def get_message(self, block=True, timeout=0.1, get_partition_info=None):
return self.consumer.get_message(block, timeout, get_partition_info)
def _get_message(self, block=True, timeout=0.1, get_partition_info=None,
update_offset=True):
return self.consumer._get_message(block, timeout, get_partition_info,
update_offset)
def __iter__(self):
for msg in self.consumer:
yield msg
示例3: KafkaSpiderMixin
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
class KafkaSpiderMixin(object):
"""
Mixin class to implement reading urls from a kafka queue.
:type kafka_topic: str
"""
kafka_topic = None
def process_kafka_message(self, message):
""""
Tell this spider how to extract urls from a kafka message
:param message: A Kafka message object
:type message: kafka.common.OffsetAndMessage
:rtype: str or None
"""
if not message:
return None
return message.message.value
def setup_kafka(self, settings):
"""Setup redis connection and idle signal.
This should be called after the spider has set its crawler object.
:param settings: The current Scrapy settings being used
:type settings: scrapy.settings.Settings
"""
if not hasattr(self, 'topic') or not self.topic:
self.topic = '%s-starturls' % self.name
hosts = settings.get('SCRAPY_KAFKA_HOSTS', ['localhost:9092'])
consumer_group = settings.get('SCRAPY_KAFKA_SPIDER_CONSUMER_GROUP', 'scrapy-kafka')
_kafka = KafkaClient(hosts)
# wait at most 1sec for more messages. Otherwise continue
self.consumer = SimpleConsumer(_kafka, consumer_group, self.topic,
auto_commit=True, iter_timeout=1.0)
# idle signal is called when the spider has no requests left,
# that's when we will schedule new requests from kafka topic
self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
self.log("Reading URLs from kafka topic '%s'" % self.kafka_topic)
def next_request(self):
"""
Returns a request to be scheduled.
:rtype: str or None
"""
message = self.consumer.get_message(True)
url = self.process_kafka_message(message)
if not url:
return None
return self.make_requests_from_url(url)
def schedule_next_request(self):
"""Schedules a request if available"""
req = self.next_request()
if req:
self.crawler.engine.crawl(req, spider=self)
def spider_idle(self):
"""Schedules a request if available, otherwise waits."""
self.schedule_next_request()
raise DontCloseSpider
def item_scraped(self, *args, **kwargs):
"""Avoids waiting for the spider to idle before scheduling the next request"""
self.schedule_next_request()
示例4: __init__
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
class zk_client:
def __init__(self, topics, zk_hosts='127.0.0.1:2181',
consumer_group=kafka_consts.CONSUMER_GROUP):
self.zk_hosts = zk_hosts
self.kafka_client = None
self.consumer_group = consumer_group
self.lock = Lock()
self.zk_st_watcher = zk_states_watcher()
self.consumer_id = uuid1().hex
self.consumer_ids = [self.consumer_id]
self.consumer_id_path = '{}/{}/{}'.format(kafka_consts.CONSUMER_PATH, self.consumer_group,
'ids')
try:
self.zoo_cl = KazooClient(self.zk_hosts)
self.zoo_cl.add_listener(self.zk_st_watcher)
self.broker_details = {}
self.zoo_cl.start()
sleep(1)
self._init(topics)
except Exception as e:
logging.exception(e)
def register(self):
ret = False
while not ret:
ret = self.create_ephemeralpath(self.consumer_id_path + '/' + self.consumer_id)
if not ret:
sleep(1)
def _init(self, topics):
ret = False
while not ret:
ret = self.create_newpath(kafka_consts.CONSUMER_PATH + '/' + self.consumer_group)
if not ret:
sleep(1)
ret = False
while not ret:
ret = self.create_newpath(kafka_consts.CONSUMER_PATH + '/' + self.consumer_group +
'/ids')
if not ret:
sleep(1)
self.register()
self.get_consumer_list()
self.populate_broker_info()
temptopics = [x.strip() for x in topics]
self.topics = []
for t in temptopics:
if t != '' and t not in self.topics:
self.topics.append(t)
if not self.topics:
raise ValueError('no topics passed')
ret = False
broker_ports = []
with self.lock:
for brid in self.broker_details:
broker_port = self.broker_details[brid]
broker_ports.append('{}:{}'.format(broker_port['host'],broker_port['port']))
self.kafka_client = nsclient(broker_ports)
self.topic_part_ids = {}
for topic in topics:
pids = self.kafka_client.get_partition_ids_for_topic(topic)
self.topic_part_ids[topic] = pids
self.consumed = {}
self.rebalance_consumers()
try:
topic_partitions = {t : None for t in self.topics}
self.kconsumer = SimpleConsumer(self.kafka_client, self.consumer_group, None,
topic_partitions=self.consumed.copy())
except Exception as e:
logging.exception(e)
sys.exit(1)
def get_message(self):
try:
return self.kconsumer.get_message(timeout=1, get_partition_info=True)
except Exception as e:
logging.exception(e)
return None
@synchronized
def populate_broker_info(self):
brokers = self.get_brokerids()
self.broker_details.clear()
for brid in brokers:
try:
brdetails = self.get_data(kafka_consts.BROKER_ID_PATH + '/' + brid)
if brdetails is None:
continue
brjson = json.loads(brdetails[0])
self.broker_details[brid] = brjson
except Exception as e:
logging.exception(e)
def create_newpath(self, path):
'''
Create the znode path if it is not existing already
#.........这里部分代码省略.........
示例5: KafkaClient
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
from kafka.client import KafkaClient
from kafka.consumer import SimpleConsumer
from kafka.producer import SimpleProducer, KeyedProducer
import logging
logging.basicConfig(
format='%(asctime)s.%(msecs)s:%(name)s:%(thread)d:%(levelname)s:%(process)d:%(message)s',
level=logging.DEBUG
)
kafka = KafkaClient("localhost:9092")
kafka.send_offset_fetch_request('test-group')
# To consume messages
consumer = SimpleConsumer(kafka, "test-group", "test-topic", auto_commit_every_n=1, iter_timeout=10)
# while True:
# for message in consumer:
# print(message)
# consumer.commit()
print consumer.get_message()
# consumer.commit(partitions=[0])
kafka.close()
示例6: main
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import get_message [as 别名]
def main():
"""kafkadump: Kafka topic dump utility for debugging.
Usage:
kafkadump list --host=<host>
kafkadump dump <topic> --host=<host> [--consumer=<consumer>]
Examples:
List all the topics on your local Kafka instance:
python kafkadump.py list --host=<kafkahost>:9092
Dump the contents of a single topic starting from offset 0:
python kafkadump.py dump test.crawled_firehose --host=<kafkahost>:9092
Use CTRL+C (SIGINT, KeyboardInterrupt) to stop it from polling Kafka.
It will end by printing the total records serviced and the raw output
of the most recent record.
Options:
-h --host <host> Kafka host name where Kafka cluster will be resolved
-c --consumer <consumer> Consumer group ID to use for reading messages
"""
args = docopt(main.__doc__)
host = args["--host"]
logging.basicConfig()
print "=> Connecting to {0}...".format(host)
kafka = KafkaClient(host)
print "=> Connected."
if args["list"]:
for topic in kafka.topic_partitions.keys():
print topic
return 0
elif args["dump"]:
topic = args["<topic>"]
consumer_id = args["--consumer"] or "default"
consumer = SimpleConsumer(kafka, consumer_id, topic,
buffer_size=1024*100, # 100kb
fetch_size_bytes=1024*100, # 100kb
max_buffer_size=None # eliminate big message errors
)
consumer.seek(0, 0)
num_records = 0
total_bytes = 0
item = None
while True:
try:
message = consumer.get_message()
if message is None:
time.sleep(1)
continue
val = message.message.value
item = json.loads(val)
body_bytes = len(item)
print item
num_records = num_records + 1
total_bytes = total_bytes + body_bytes
except:
traceback.print_exc()
break
total_mbs = float(total_bytes) / (1024*1024)
print
if item is not None:
print json.dumps(item, indent=4)
if num_records == 0:
num_records = 1
print num_records, "records", total_mbs, "megabytes", (float(total_bytes) / num_records / 1024), "kb per msg"
kafka.close()
return 0