本文整理汇总了Python中kafka.KafkaConsumer.next方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaConsumer.next方法的具体用法?Python KafkaConsumer.next怎么用?Python KafkaConsumer.next使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer
的用法示例。
在下文中一共展示了KafkaConsumer.next方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_process_change
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def test_process_change(self):
consumer = KafkaConsumer(
topics.CASE,
group_id='test-consumer',
bootstrap_servers=[settings.KAFKA_URL],
consumer_timeout_ms=100,
)
pillow = ChangeFeedPillow(self._fake_couch, kafka=get_kafka_client(), checkpoint=None)
document = {
'doc_type': 'CommCareCase',
'type': 'mother',
'domain': 'kafka-test-domain',
}
pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
message = consumer.next()
change_meta = change_meta_from_kafka_message(message.value)
self.assertEqual(COUCH, change_meta.data_source_type)
self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name)
self.assertEqual('test-id', change_meta.document_id)
self.assertEqual(document['doc_type'], change_meta.document_type)
self.assertEqual(document['type'], change_meta.document_subtype)
self.assertEqual(document['domain'], change_meta.domain)
self.assertEqual(False, change_meta.is_deletion)
with self.assertRaises(ConsumerTimeout):
consumer.next()
示例2: ChangeFeedPillowTest
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
class ChangeFeedPillowTest(SimpleTestCase):
# note: these tests require a valid kafka setup running
def setUp(self):
self._fake_couch = FakeCouchDb()
self._fake_couch.dbname = 'test-couchdb'
with trap_extra_setup(KafkaUnavailableError):
self.consumer = KafkaConsumer(
topics.CASE,
group_id='test-consumer',
bootstrap_servers=[settings.KAFKA_URL],
consumer_timeout_ms=100,
)
self.pillow = ChangeFeedPillow(
'fake-changefeed-pillow-id', self._fake_couch, kafka=get_kafka_client(), checkpoint=None
)
def test_process_change(self):
document = {
'doc_type': 'CommCareCase',
'type': 'mother',
'domain': 'kafka-test-domain',
}
self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
message = self.consumer.next()
change_meta = change_meta_from_kafka_message(message.value)
self.assertEqual(COUCH, change_meta.data_source_type)
self.assertEqual(self._fake_couch.dbname, change_meta.data_source_name)
self.assertEqual('test-id', change_meta.document_id)
self.assertEqual(document['doc_type'], change_meta.document_type)
self.assertEqual(document['type'], change_meta.document_subtype)
self.assertEqual(document['domain'], change_meta.domain)
self.assertEqual(False, change_meta.is_deletion)
with self.assertRaises(ConsumerTimeout):
self.consumer.next()
def test_process_change_with_unicode_domain(self):
document = {
'doc_type': 'CommCareCase',
'type': 'mother',
'domain': u'हिंदी',
}
self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
message = self.consumer.next()
change_meta = change_meta_from_kafka_message(message.value)
self.assertEqual(document['domain'], change_meta.domain)
def test_no_domain(self):
document = {
'doc_type': 'CommCareCase',
'type': 'mother',
'domain': None,
}
self.pillow.process_change(Change(id='test-id', sequence_id='3', document=document))
message = self.consumer.next()
change_meta = change_meta_from_kafka_message(message.value)
self.assertEqual(document['domain'], change_meta.domain)
示例3: run
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def run(self):
cons = KafkaConsumer(bootstrap_servers=['localhost:9092'], consumer_timeout_ms=1000)
cons.set_topic_partitions(('first', 0, 0), ('first', 1, 0))
count = 0
try:
line = cons.next()
res = dict({0: 0, 1: 0})
while line:
res[line.partition] += 1
line = cons.next()
except ConsumerTimeout:
print 'done fetching'
for k in res.keys():
print 'messages:', k, ' : ', res[k]
示例4: generator
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def generator():
keep_alive_in_a_row = 0
messages_read = 0
# init batch
messages_read_in_batch = 0
current_batch = {partition: [] for partition in partitions}
batch_start_time = time.time()
with kafka_pool.kafka_client() as client:
consumer = KafkaConsumer(topics,
kafka_client=client,
auto_commit_enable=False,
consumer_timeout_ms=200)
while True:
try:
message = consumer.next()
# if we read the message - reset keep alive counter
keep_alive_in_a_row = 0
# put message to batch
messages_read += 1
messages_read_in_batch += 1
current_batch[message.partition].append(message.value.decode('utf-8'))
except ConsumerTimeout:
pass
# check if it's time to send the batch
time_since_batch_start = time.time() - batch_start_time
latest_offsets = consumer.offsets("fetch")
if time_since_batch_start >= opts['batch_flush_timeout'] != 0 or \
messages_read_in_batch >= opts['batch_limit']:
yield from process_batch(latest_offsets, current_batch)
# if we hit keep alive count limit - close the stream
if messages_read_in_batch == 0:
if keep_alive_in_a_row >= opts['batch_keep_alive_limit'] != -1:
break
keep_alive_in_a_row += 1
# init new batch
messages_read_in_batch = 0
current_batch = {partition: [] for partition in partitions}
batch_start_time = time.time()
yield BATCH_SEPARATOR
# check if we reached the stream timeout or message count limit
time_since_start = time.time() - start
if time_since_start >= opts['stream_timeout'] > 0 or 0 < opts['stream_limit'] <= messages_read:
if messages_read_in_batch > 0:
yield from process_batch(latest_offsets, current_batch)
break
示例5: KafkaReader
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
class KafkaReader():
def __init__(self, topic):
# To consume latest messages and auto-commit offsets
self.consumer = KafkaConsumer(group_id='my-group',
bootstrap_servers='localhost:9092',
enable_auto_commit=True)
self.consumer.subscribe(topics=[topic])
self.buffer = []
def read_message(self):
# print len(self.buffer)
# if len(self.buffer) == 0:
# self.get_batch()
# if len(self.buffer) > 0:
# return self.buffer.pop(0)
# else:
# print "11"
# return None
return self.consumer.next()
def get_batch(self):
for message in self.consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
self.buffer.append(message)
print self.buffer
示例6: kafkaConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
class kafkaConsumer():
def __init__(self):
self.var = 1
self.consumer = KafkaConsumer('first')
# self.client = MongoClient("172.31.46.29:27017")
self.client = MongoClient("localhost:27017")
self.db = self.client.datapipeline
def runKafkaConsumer(self):
try:
while(self.consumer.next()):
tmp = self.consumer.next()
self.db.data0.insert({"offset":tmp.offset,"partition":tmp.partition,"topic":tmp.topic,"value":tmp.value})
except:
sys.exit()
def checkMongo(self):
print self.db.data0.find().next()
示例7: create_consumer_group
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def create_consumer_group(topic, group_name, num_messages=1):
consumer = KafkaConsumer(
topic,
group_id=group_name,
auto_commit_enable=False,
bootstrap_servers=[KAFKA_URL],
auto_offset_reset='smallest')
for i in xrange(num_messages):
message = consumer.next()
consumer.task_done(message)
consumer.commit()
return consumer
示例8: TweetParse
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
class TweetParse(Spout):
def __init__(self, storm_conf, context):
self.kafka_conf = json.loads(open('kafka_conf.json').read())
self.kafka_consumer = KafkaConsumer(self.kafka_conf["kafka_topic"],
bootstrap_servers=self.kafka_conf["kafka_server"])
self.kafka_producer = KafkaProducer(value_serializer=lambda v: json.dumps(v),
bootstrap_servers=self.kafka_conf["kafka_server"])
def next_tuple(self):
message = self.kafka_consumer.next()
Spout.emit(message)
return message
def _send_ouput(self, message):
self.kafka_producer.send(self._conf["kafka"]["tweeter-topic-parsed"], message)
示例9: KafkaConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
__author__ = 'jchugh'
from config import config
from kafka import KafkaConsumer
kafka_consumer = KafkaConsumer(bootstrap_servers=[config.get('kafka', 'hosts')],
client_id=config.get('kafka', 'client_id'),
group_id=config.get('kafka', 'group_id'),
auto_commit_enable=config.getboolean('kafka', 'auto_commit'),
auto_offset_reset=config.get('kafka', 'auto_offset_reset'),
deserializer_class=lambda msg: msg.decode('utf-8'),
auto_commit_interval_ms=config.getint('kafka', 'auto_commit_time_ms'))
kafka_consumer.set_topic_partitions(config.get('kafka', 'output_topic'))
while True:
data = kafka_consumer.next()
print data.value
示例10: generator
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def generator():
messages_read = 0
keep_alive_in_a_row = 0
current_batch = []
batch_start_time = datetime.datetime.now()
with kafka_pool.kafka_client() as client:
consumer = KafkaConsumer((topic, partition, start_from),
kafka_client = client,
auto_commit_enable = False,
consumer_timeout_ms = 200)
while True:
try:
message = consumer.next()
# if we read the message - reset keep alive counter
keep_alive_in_a_row = 0
# put message to batch
messages_read += 1
message_json = json.loads(message.value.decode('utf-8'))
current_batch.append(message_json)
except ConsumerTimeout:
pass
# check if it's time to send the batch
time_since_batch_start = datetime.datetime.now() - batch_start_time
if time_since_batch_start.total_seconds() >= opts['batch_flush_timeout'] != 0 or len(current_batch) >= opts['batch_limit']:
# send the messages we could read so far
if len(current_batch) > 0:
stream_message = __create_stream_message(partition, start_from + messages_read, current_batch)
with __measure_time(current_batch, stream_message):
yield stream_message
# just send the keep alive
else:
# if we hit keep alive count limit - close the stream
if keep_alive_in_a_row >= opts['batch_keep_alive_limit'] != -1:
break
# send keep alive message
keep_alive_in_a_row += 1
yield __create_stream_message(partition, start_from + messages_read)
# init new batch
current_batch = []
batch_start_time = datetime.datetime.now()
# check if we reached the stream timeout or message count limit
time_since_start = datetime.datetime.now() - start
if time_since_start.total_seconds() >= opts['stream_timeout'] > 0 or 0 < opts['stream_limit'] <= messages_read:
# flush what we have read so far (if any) and close the stream
if len(current_batch) > 0:
stream_message = __create_stream_message(partition, start_from + messages_read, current_batch)
with __measure_time(current_batch, stream_message):
yield stream_message
break
示例11: generator
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
def generator():
keep_alive_in_a_row = 0
messages_read = 0
# init batch
messages_read_in_batch = 0
current_batch = {}
for partition in partitions:
current_batch[partition] = []
batch_start_time = datetime.datetime.now()
with kafka_pool.kafka_client() as client:
consumer = KafkaConsumer(topics, kafka_client=client, auto_commit_enable=False, consumer_timeout_ms=200)
while True:
try:
message = consumer.next()
# if we read the message - reset keep alive counter
keep_alive_in_a_row = 0
# put message to batch
messages_read += 1
messages_read_in_batch += 1
message_json = json.loads(message.value.decode("utf-8"))
current_batch[message.partition].append(message_json)
except ConsumerTimeout:
pass
# check if it's time to send the batch
time_since_batch_start = datetime.datetime.now() - batch_start_time
latest_offsets = consumer.offsets("fetch")
if (
time_since_batch_start.total_seconds() >= opts["batch_flush_timeout"] != 0
or messages_read_in_batch >= opts["batch_limit"]
):
for partition in partitions:
topic_partition = (topic.encode("UTF-8"), partition)
# send the messages we could read so far
if len(current_batch[partition]) > 0:
stream_message = __create_stream_message(
partition, latest_offsets[topic_partition], current_batch[partition]
)
with __measure_time(current_batch[partition], stream_message):
yield stream_message
# just send the keep alive
else:
yield __create_stream_message(partition, latest_offsets[topic_partition])
# if we hit keep alive count limit - close the stream
if messages_read_in_batch == 0:
if keep_alive_in_a_row >= opts["batch_keep_alive_limit"] != -1:
break
keep_alive_in_a_row += 1
# init new batch
messages_read_in_batch = 0
current_batch = {}
for partition in partitions:
current_batch[partition] = []
batch_start_time = datetime.datetime.now()
yield BATCH_SEPARATOR
# check if we reached the stream timeout or message count limit
time_since_start = datetime.datetime.now() - start
if (
time_since_start.total_seconds() >= opts["stream_timeout"] > 0
or 0 < opts["stream_limit"] <= messages_read
):
if messages_read_in_batch > 0:
for partition in partitions:
topic_partition = (topic.encode("UTF-8"), partition)
# send the messages we could read so far
if len(current_batch[partition]) > 0:
stream_message = __create_stream_message(
partition, latest_offsets[topic_partition], current_batch[partition]
)
with __measure_time(current_batch[partitions], stream_message):
yield stream_message
# just send the keep alive
else:
yield __create_stream_message(partition, latest_offsets[topic_partition])
break
示例12: KafkaMQ
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import next [as 别名]
class KafkaMQ(MQBase):
"""
kafka://sparkh1:9092,sparkh2:9092,sparkh3:9092?topics=xx&group_id=xx
"""
_CLIENT_ID_ = 'PyThunderKafkaMQCli'
_GROUP_ID_ = 'PyThunderKafkaMQCliGroup'
maxLowTimeout = 0.1
def __init__(self, url):
super(KafkaMQ, self).__init__(url=url)
consumerKwargs, producerKwargs = self.fromUrl(url)
self._topic = consumerKwargs.pop('topic', '')
topics = self._topic.split(',')
self._consumer = KafkaConsumer(*topics, **consumerKwargs)
self._producer = Producer(**producerKwargs)
def qsize(self):
try:
with Timeout(seconds=self.maxLowTimeout, exception=None, ):
print('no offsets: {}'.format(self._get(noCommit=True)))
except:
pass
localOffsets = self._consumer.offsets()
return localOffsets
def _get(self, noCommit=False):
with Timeout(seconds=self.maxLowTimeout, exception=self.Empty):
kafkaMessage = self._consumer.next()
if noCommit:
return kafkaMessage
if kafkaMessage:
try:
return kafkaMessage.value
finally:
self._commit(kafkaMessage)
else:
raise self.Empty
def _put(self, item):
with Timeout(seconds=self.maxLowTimeout, exception=self.Full):
status = self._producer.send_messages(self._topic, None, *[item])
if status:
return 10
def _commit(self, message):
try:
self._consumer.task_done(message)
self._consumer.commit()
except:
pass
def close(self):
try:
del self._consumer
self._producer.close()
del self._producer
except:
pass
def fromUrl(self, url):
urlInfo = urlparse(url)
qs = urlInfo.query and urlInfo.query or ''
kwargs = dict()
options = dict()
options['hosts'] = urlInfo.netloc
options['topic'] = urlInfo.path.strip('/')
for name, value in six.iteritems(dict(parse_qsl(qs))):
if value:
options[name] = value
self.maxSize = options.pop('maxSize', 10000)
self.lazyLimit = options.pop('lazyLimit', True)
options.setdefault('group', self._GROUP_ID_ + '-{}'.format(id(self)))
# options.setdefault('group')
options.setdefault('client', self._CLIENT_ID_)
if urlInfo.scheme != 'kafka':
raise AttributeError('schema {} not supported'.format(urlInfo.scheme))
else:
for name, value in six.iteritems(options):
mirror = urlOptMaps.get(name)
if mirror:
value = mirror[1](value)
if mirror == 'bootstrap_servers':
value = value.split(',')
kwargs[mirror[0]] = value
else:
kwargs[name] = value
return kwargs, {
'hosts': options.pop('hosts', '').split(','),
'client_id': options.pop('client_id', self._CLIENT_ID_),
'timeout': options.pop('timeout', 120)
}