本文整理汇总了Python中kafka.KafkaConsumer.assign方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaConsumer.assign方法的具体用法?Python KafkaConsumer.assign怎么用?Python KafkaConsumer.assign使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer
的用法示例。
在下文中一共展示了KafkaConsumer.assign方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: commit_offsets_in_kafka
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def commit_offsets_in_kafka(broker, group_name, group_dict):
cons = KafkaConsumer(bootstrap_servers=broker, group_id=group_name)
for topic_name, topic_dict in group_dict.iteritems():
for partition, offset in topic_dict.iteritems():
logging.info(
"Commiting {} {} to topic {} and partition number {}".format(
group_name, offset, topic_name, partition))
tp = TopicPartition(topic_name, int(partition))
cons.assign([tp])
cons.seek(tp, int(offset))
# commit it
cons.commit()
time.sleep(8)
cons.close()
time.sleep(1)
示例2: func
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def func(topic, partition):
i=0
consumer = KafkaConsumer(bootstrap_servers='104.154.53.184:6667', group_id='grp-5327', auto_offset_reset='earliest',
consumer_timeout_ms = 10000)
consumer.assign([TopicPartition(topic, partition)])
for msg in consumer:
i=i+1
print(i)
示例3: start
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def start(self):
kafka_brokers = '{0}:{1}'.format(self._server,self._port)
consumer = KC(bootstrap_servers=[kafka_brokers],group_id=self._topic)
partition = [TopicPartition(self._topic,int(self._id))]
consumer.assign(partitions=partition)
consumer.poll()
return consumer
示例4: Consumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class Consumer(BaseStreamConsumer):
"""
Used in DB and SW worker. SW consumes per partition.
"""
def __init__(self, location, enable_ssl, cert_path, topic, group, partition_id):
self._location = location
self._group = group
self._topic = topic
kwargs = _prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {}
self._consumer = KafkaConsumer(
bootstrap_servers=self._location,
group_id=self._group,
max_partition_fetch_bytes=10485760,
consumer_timeout_ms=100,
client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
request_timeout_ms=120 * 1000,
heartbeat_interval_ms=10000,
**kwargs
)
# explicitly causing consumer to bootstrap the cluster metadata
self._consumer.topics()
if partition_id is not None:
self._partitions = [TopicPartition(self._topic, partition_id)]
self._consumer.assign(self._partitions)
else:
self._partitions = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
self._consumer.subscribe(topics=[self._topic])
def get_messages(self, timeout=0.1, count=1):
result = []
while count > 0:
try:
m = next(self._consumer)
result.append(m.value)
count -= 1
except StopIteration:
break
return result
def get_offset(self, partition_id):
for tp in self._partitions:
if tp.partition == partition_id:
return self._consumer.position(tp)
raise KeyError("Can't find partition %d", partition_id)
def close(self):
self._consumer.commit()
self._consumer.close()
示例5: poll
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def poll(topic, offset=0, hostname=None, port_num=None, max_timeout=100):
hostname, port_num = insure_host_port(hostname, port_num)
server = hostname+':'+str(port_num)
topic_partition = TopicPartition(topic, partition)
consumer = KafkaConsumer(bootstrap_servers=server, group_id=None)
consumer.assign([topic_partition])
consumer.seek(topic_partition, offset)
msgs = consumer.poll(max_timeout).values()
consumer.close()
if len(msgs) > 0:
return msgs[0]
else:
return {}
示例6: run
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def run(self):
global useavro, useextra, schema_id, sslEnable
print("start Consumer")
if useavro:
topic="avro.log.localtest"
else:
topic="raw.log.localtest"
print("on topic %s" % topic)
if sslEnable:
print("setting up SSL to PROTOCOL_TLSv1")
ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ctx.load_cert_chain(certfile="../ca-cert", keyfile="../ca-key", password="test1234")
consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9093"],security_protocol="SASL_SSL",ssl_context=ctx,\
sasl_mechanism="PLAIN",sasl_plain_username="test",sasl_plain_password="test", group_id="test")
else:
consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9092"])
consumer.assign([TopicPartition(topic, 0)])
## Skip the consumer to the head of the log - this is a personal choice
## It mean we are loosing messages when the py consumer was off
## Not a problem for testing purposes
#consumer.seek(0,2)
for message in consumer:
print('-'*60)
try:
consume_message(message)
except:
print('error')
print('-'*60)
traceback.print_exc(file=sys.stdout)
print('-'*60)
示例7: kafka_consumer_test
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def kafka_consumer_test():
topic_name = 'topic_test'
bootstrap_servers = ['localhost:9092']
# consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
# enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset
# auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用,
partition_set = consumer.partitions_for_topic(topic_name)
partitions = [ TopicPartition(topic_name, partition_idx) for partition_idx in partition_set ]
consumer.assign(partitions)
topic_partition_set = consumer.assignment()
#consumer.seek_to_beginning() # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置
#consumer.seek_to_end() # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置
for topic_partition in topic_partition_set:
offset = consumer.position(topic_partition)
print "partition: %d, offset: %d" % (topic_partition.partition, offset)
#consumer.seek(topicTopicPartition, offset) # 尽量不要手动设置这个值
for msg in consumer:
print ("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
示例8: KafkaConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
import sys, os, re
import json
from kafka import KafkaConsumer, TopicPartition
consumer = KafkaConsumer()
consumer.assign([TopicPartition('test', 0)])
consumer.seek_to_beginning()
for message in consumer:
message_bytes = message.value
message_string = message_bytes.decode()
message_object = json.loads(message_string)
print(message_object)
示例9: CheckKafka
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
#.........这里部分代码省略.........
try:
if list_partitions:
if self.topic:
self.print_topic_partitions(self.topic)
else:
for topic in self.get_topics():
self.print_topic_partitions(topic)
sys.exit(ERRORS['UNKNOWN'])
except KafkaError:
raise CriticalError(self.exception_msg())
self.partition = self.get_opt('partition')
# technically optional, will hash to a random partition, but need to know which partition to get offset
# if self.partition is not None:
validate_int(self.partition, "partition", 0, 10000)
self.topic_partition = TopicPartition(self.topic, self.partition)
self.acks = self.get_opt('acks')
try:
self.acks = int(self.acks)
except ValueError:
pass
log_option('acks', self.acks)
self.validate_thresholds()
def subscribe(self):
self.consumer = KafkaConsumer(
#self.topic,
bootstrap_servers=self.brokers,
# client_id=self.client_id,
# group_id=self.group_id,
request_timeout_ms=self.timeout_ms
)
#key_serializer
#value_serializer
# this is only a guess as Kafka doesn't expose it's API version
#log.debug('kafka api version: %s', self.consumer.config['api_version'])
log.debug('partition assignments: {0}'.format(self.consumer.assignment()))
# log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition))
# self.consumer.subscribe(TopicPartition(self.topic, self.partition))
# log.debug('partition assignments: {0}'.format(self.consumer.assignment()))
log.debug('assigning partition {0} to consumer'.format(self.partition))
# self.consumer.assign([self.partition])
self.consumer.assign([self.topic_partition])
log.debug('partition assignments: {0}'.format(self.consumer.assignment()))
log.debug('getting current offset')
# see also highwater, committed, seek_to_end
self.start_offset = self.consumer.position(self.topic_partition)
if self.start_offset is None:
# don't do this, I've seen scenario where None is returned and all messages are read again, better to fail
# log.warn('consumer position returned None, resetting to zero')
# self.start_offset = 0
raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset))
log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
# self.consumer.pause()
def publish(self):
log.debug('creating producer')
self.producer = KafkaProducer(
bootstrap_servers=self.brokers,
client_id=self.client_id,
acks=self.acks,
batch_size=0,
max_block_ms=self.timeout_ms,
request_timeout_ms=self.timeout_ms
)
#key_serializer
#value_serializer
log.debug('producer.send()')
self.producer.send(
self.topic,
key=self.key,
partition=self.partition,
value=self.publish_message
)
log.debug('producer.flush()')
self.producer.flush()
def consume(self):
self.consumer.assign([self.topic_partition])
log.debug('consumer.seek({0})'.format(self.start_offset))
self.consumer.seek(self.topic_partition, self.start_offset)
# self.consumer.resume()
log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms))
obj = self.consumer.poll(timeout_ms=self.timeout_ms)
log.debug('msg object returned: %s', obj)
msg = None
try:
for consumer_record in obj[self.topic_partition]:
if consumer_record.key == self.key:
msg = consumer_record.value
break
except KeyError:
raise UnknownError('TopicPartition key was not found in response')
if msg is None:
raise UnknownError("failed to find matching consumer record with key '{0}'".format(self.key))
return msg
示例10: KafkaChangeFeed
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class KafkaChangeFeed(ChangeFeed):
"""
Kafka-based implementation of a ChangeFeed
"""
sequence_format = 'json'
def __init__(self, topics, client_id, strict=False, num_processes=1, process_num=0):
"""
Create a change feed listener for a list of kafka topics, a client ID, and partition.
See http://kafka.apache.org/documentation.html#introduction for a description of what these are.
"""
self._topics = topics
self._client_id = client_id
self._processed_topic_offsets = {}
self.strict = strict
self.num_processes = num_processes
self.process_num = process_num
self._consumer = None
def __str__(self):
return 'KafkaChangeFeed: topics: {}, client: {}'.format(self._topics, self._client_id)
@property
def topics(self):
return self._topics
def _get_single_topic_or_fail(self):
if len(self._topics) != 1:
raise ValueError("This function requires a single topic but found {}!".format(self._topics))
return self._topics[0]
def iter_changes(self, since, forever):
"""
Since must be a dictionary of topic partition offsets.
"""
timeout = float('inf') if forever else MIN_TIMEOUT
start_from_latest = since is None
reset = 'largest' if start_from_latest else 'smallest'
self._init_consumer(timeout, auto_offset_reset=reset)
since = self._filter_offsets(since)
# a special value of since=None will start from the end of the change stream
if since is not None and (not isinstance(since, dict) or not since):
raise ValueError("'since' must be None or a topic offset dictionary")
if not start_from_latest:
if self.strict:
validate_offsets(since)
checkpoint_topics = {tp[0] for tp in since}
extra_topics = checkpoint_topics - set(self._topics)
if extra_topics:
raise ValueError("'since' contains extra topics: {}".format(list(extra_topics)))
self._processed_topic_offsets = copy(since)
# Tell the consumer to start from offsets that were passed in
for topic_partition, offset in since.items():
self.consumer.seek(TopicPartition(topic_partition[0], topic_partition[1]), int(offset))
try:
for message in self.consumer:
self._processed_topic_offsets[(message.topic, message.partition)] = message.offset
yield change_from_kafka_message(message)
except StopIteration:
assert not forever, 'Kafka pillow should not timeout when waiting forever!'
# no need to do anything since this is just telling us we've reached the end of the feed
def get_current_checkpoint_offsets(self):
# the way kafka works, the checkpoint should increment by 1 because
# querying the feed is inclusive of the value passed in.
latest_offsets = self.get_latest_offsets()
ret = {}
for topic_partition, sequence in self.get_processed_offsets().items():
if sequence == latest_offsets[topic_partition]:
# this topic and partition is totally up to date and if we add 1
# then kafka will give us an offset out of range error.
# not adding 1 to the partition means that we may process this
# change again later, but that should be OK
sequence = latest_offsets[topic_partition]
else:
sequence += 1
ret[topic_partition] = sequence
return self._filter_offsets(ret)
def get_processed_offsets(self):
return copy(self._processed_topic_offsets)
def get_latest_offsets(self):
return self.consumer.end_offsets(self.consumer.assignment())
def get_latest_offsets_json(self):
return json.loads(kafka_seq_to_str(self.get_latest_offsets()))
def get_latest_offsets_as_checkpoint_value(self):
return self.get_latest_offsets()
@property
def consumer(self):
#.........这里部分代码省略.........
示例11: ClusterZookeeper
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class ClusterZookeeper(object):
def __init__(self, zookeeper_hosts, kafka_hosts):
self.groups_dict = {}
self.topics_dict = {}
self.brokers_list = []
self.consumer = KafkaConsumer(bootstrap_servers=kafka_hosts.split(','))
self.zk = KazooClient(hosts=zookeeper_hosts)
self.zk.add_listener(self.keep_start)
self.zk.start()
if self.zk.exists('/consumers') is None or self.zk.exists('/brokers') is None:
raise ValueError(zookeeper_hosts + 'is not zookeeper of kafka')
ChildrenWatch(self.zk, '/consumers', self.groups_watch)
ChildrenWatch(self.zk, '/brokers/topics', self.topics_watch)
ChildrenWatch(self.zk, '/brokers/ids/', self.brokers_watch)
t = threading.Thread(target=self.latest, name=kafka_hosts)
t.setDaemon(True)
t.start()
# 保证链接是可用的
def keep_start(self, client_status):
if client_status != 'CONNECTED':
try:
self.zk.start()
except():
pass
# 监听consumers节点
def groups_watch(self, children):
for group in [group for group in self.groups_dict.keys() if group not in children]:
self.groups_dict.pop(group)
for group in [group for group in children if group not in self.groups_dict.keys()]:
owners_p = '/consumers/' + group + '/owners'
if self.zk.exists(owners_p) is None:
continue
g_o_t = GroupOwnersTopic()
self.groups_dict[group] = g_o_t
ChildrenWatch(self.zk, owners_p, g_o_t.g_topic_watch)
# 监听topic节点
def topics_watch(self, children):
for topic in [topic for topic in self.topics_dict.keys() if topic not in children]:
self.topics_dict.pop(topic)
for topic in [topic for topic in children if topic not in self.topics_dict.keys()]:
t_v = TopicValue()
self.topics_dict[topic] = t_v
DataWatch(self.zk, '/brokers/topics/' + topic, t_v.topic_watch)
t_v.topic_partition = [TopicPartition(topic, p) for p in self.consumer.partitions_for_topic(topic)]
# 监听broker节点
def brokers_watch(self, children):
self.brokers_list = children
def close_zk(self):
try:
self.zk.remove_listener(self.keep_start)
self.zk.stop()
self.zk.close()
except():
pass
def latest(self):
while True:
# time.sleep(0.1)
time.sleep(0.001)
for k, v in self.topics_dict.items():
try:
partitions = v.topic_partition
self.consumer.assign(partitions)
self.consumer.seek_to_end(*partitions)
log_offset = reduce(lambda x, y: x + y, [self.consumer.position(p) for p in partitions])
now_timestamp = int(time.mktime(time.localtime()))
if 'timestamp' in v.__dict__ and v.timestamp is not None:
v.speed = (log_offset - v.off_set) / (now_timestamp - v.timestamp)
v.timestamp = now_timestamp
v.off_set = log_offset
except Exception as e:
pass
示例12: KafkaConsumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
from kafka import KafkaConsumer, TopicPartition
topic_name = "test"
consumer = KafkaConsumer(bootstrap_servers=['u1401.ambari.apache.org:6667', 'u1402.ambari.apache.org:6667', 'u1403.ambari.apache.org:6667'])
partitions = [TopicPartition(topic_name, partition) for partition in consumer.partitions_for_topic(topic_name) if partition < 5]
consumer.assign(partitions)
consumer.seek_to_beginning()
for message in consumer:
print message
示例13: Consumer
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class Consumer(BaseStreamConsumer):
"""
Used in DB and SW worker. SW consumes per partition.
"""
def __init__(self, location, topic, group, partition_id):
self._location = location
self._group = group
self._topic = topic
self._consumer = KafkaConsumer(
bootstrap_servers=self._location,
group_id=self._group,
max_partition_fetch_bytes=10485760,
consumer_timeout_ms=100,
client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
request_timeout_ms=120 * 1000,
)
if partition_id is not None:
self._partition_ids = [TopicPartition(self._topic, partition_id)]
self._consumer.assign(self._partition_ids)
else:
self._partition_ids = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
self._consumer.subscribe(topics=[self._topic])
if self._consumer._use_consumer_group():
self._consumer._coordinator.ensure_coordinator_known()
self._consumer._coordinator.ensure_active_group()
self._consumer._update_fetch_positions(self._partition_ids)
self._start_looping_call()
def _start_looping_call(self, interval=60):
def errback(failure):
logger.exception(failure.value)
if failure.frames:
logger.critical(str("").join(format_tb(failure.getTracebackObject())))
self._poll_task.start(interval).addErrback(errback)
self._poll_task = LoopingCall(self._poll_client)
self._poll_task.start(interval).addErrback(errback)
def _poll_client(self):
self._consumer._client.poll()
def get_messages(self, timeout=0.1, count=1):
result = []
while count > 0:
try:
m = next(self._consumer)
result.append(m.value)
count -= 1
except StopIteration:
break
return result
def get_offset(self, partition_id):
for tp in self._partition_ids:
if tp.partition == partition_id:
return self._consumer.position(tp)
raise KeyError("Can't find partition %d", partition_id)
def close(self):
self._poll_task.stop()
self._consumer.commit()
# getting kafka client event loop running some more and execute commit
tries = 3
while tries:
self.get_messages()
sleep(2.0)
tries -= 1
self._consumer.close()