Python KafkaConsumer.assign方法代码示例

本文整理汇总了Python中kafka.KafkaConsumer.assign方法的典型用法代码示例。如果您正苦于以下问题：Python KafkaConsumer.assign方法的具体用法？Python KafkaConsumer.assign怎么用？Python KafkaConsumer.assign使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer的用法示例。

在下文中一共展示了KafkaConsumer.assign方法的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: commit_offsets_in_kafka

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def commit_offsets_in_kafka(broker, group_name, group_dict):
    cons = KafkaConsumer(bootstrap_servers=broker, group_id=group_name)
    for topic_name, topic_dict in group_dict.iteritems():
        for partition, offset in topic_dict.iteritems():
            logging.info(
                "Commiting {} {} to topic {} and partition number {}".format(
                    group_name, offset, topic_name, partition))
            tp = TopicPartition(topic_name, int(partition))
            cons.assign([tp])
            cons.seek(tp, int(offset))
            # commit it
            cons.commit()
            time.sleep(8)
    cons.close()
    time.sleep(1)

开发者ID:goibibo，项目名称:woof，代码行数:17，代码来源:commit_offset.py

示例2: func

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def func(topic, partition):
  i=0
  consumer = KafkaConsumer(bootstrap_servers='104.154.53.184:6667', group_id='grp-5327', auto_offset_reset='earliest', 
  consumer_timeout_ms = 10000)
  consumer.assign([TopicPartition(topic, partition)])
  for msg in consumer:
    i=i+1
  print(i)

开发者ID:atmandhol，项目名称:BetterCloud，代码行数:10，代码来源:betterhack-kafka-consumer.py

示例3: start

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
 def start(self):
     
     kafka_brokers = '{0}:{1}'.format(self._server,self._port)
     consumer =  KC(bootstrap_servers=[kafka_brokers],group_id=self._topic)
     partition = [TopicPartition(self._topic,int(self._id))]
     consumer.assign(partitions=partition)
     consumer.poll()
     return consumer

开发者ID:Open-Network-Insight，项目名称:oni-ingest，代码行数:10，代码来源:kafka_client.py

示例4: Consumer

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, enable_ssl, cert_path, topic, group, partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        kwargs = _prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {}
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
            heartbeat_interval_ms=10000,
            **kwargs
        )

        # explicitly causing consumer to bootstrap the cluster metadata
        self._consumer.topics()

        if partition_id is not None:
            self._partitions = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partitions)
        else:
            self._partitions = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
            self._consumer.subscribe(topics=[self._topic])

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partitions:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._consumer.commit()
        self._consumer.close()

开发者ID:scrapinghub，项目名称:frontera，代码行数:52，代码来源:kafkabus.py

示例5: poll

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def poll(topic, offset=0, hostname=None, port_num=None, max_timeout=100):
    hostname, port_num = insure_host_port(hostname, port_num)
    server = hostname+':'+str(port_num)
    topic_partition = TopicPartition(topic, partition)

    consumer = KafkaConsumer(bootstrap_servers=server, group_id=None)
    consumer.assign([topic_partition])
    consumer.seek(topic_partition, offset)
    msgs = consumer.poll(max_timeout).values()
    consumer.close()
    if len(msgs) > 0:
        return msgs[0]
    else:
        return {}

开发者ID:shiladityasen，项目名称:Locus，代码行数:16，代码来源:KafkaAPI.py

示例6: run

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
  def run(self):
    global useavro, useextra, schema_id, sslEnable
    print("start Consumer")

    if useavro: 
     topic="avro.log.localtest"
    else:
     topic="raw.log.localtest"

    print("on topic %s" % topic)

    if sslEnable:
      print("setting up SSL to PROTOCOL_TLSv1")
      ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
      ctx.load_cert_chain(certfile="../ca-cert", keyfile="../ca-key", password="test1234")
      consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9093"],security_protocol="SASL_SSL",ssl_context=ctx,\
    sasl_mechanism="PLAIN",sasl_plain_username="test",sasl_plain_password="test", group_id="test")
    else:
      consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9092"])

    consumer.assign([TopicPartition(topic, 0)])

    ## Skip the consumer to the head of the log - this is a personal choice
    ## It mean we are loosing messages when the py consumer was off
    ## Not a problem for testing purposes
    #consumer.seek(0,2)

    for message in consumer:
      print('-'*60)
      try:
        consume_message(message)           
      except:
          print('error')
          print('-'*60)
          traceback.print_exc(file=sys.stdout)
          print('-'*60)

开发者ID:pndaproject，项目名称:example-kafka-clients，代码行数:38，代码来源:consumer.py

示例7: kafka_consumer_test

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
def kafka_consumer_test():
    topic_name = 'topic_test'
    bootstrap_servers = ['localhost:9092']

    # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
    consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
        # enable_auto_commit=True（默认）才能断点续消，此时服务端会保存该group_id的offset
        # auto_offset_reset='earliest'，默认值是latest，只在offset发生异常是起作用，
    
    partition_set = consumer.partitions_for_topic(topic_name)
    partitions = [ TopicPartition(topic_name, partition_idx) for partition_idx in partition_set ]
    consumer.assign(partitions)
    topic_partition_set = consumer.assignment()

    #consumer.seek_to_beginning()    # 设置offset到集群中保存的第一个值，不一定是0, 没有参数则，对consumer的每一个partition设置
    #consumer.seek_to_end()    # 设置offset到当前没有消费的第一个值, 没有参数则，对consumer的每一个partition设置
    for topic_partition in topic_partition_set:
        offset = consumer.position(topic_partition)
        print "partition: %d, offset: %d" % (topic_partition.partition, offset)
        #consumer.seek(topicTopicPartition, offset)     # 尽量不要手动设置这个值


    for msg in consumer:
        print ("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))

开发者ID:andychow01，项目名称:python，代码行数:26，代码来源:kafka_client.py

示例8: KafkaConsumer

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
import sys, os, re
import json

from kafka import KafkaConsumer, TopicPartition
consumer = KafkaConsumer()
consumer.assign([TopicPartition('test', 0)])
consumer.seek_to_beginning()

for message in consumer:
  message_bytes = message.value
  message_string = message_bytes.decode()
  message_object = json.loads(message_string)
  print(message_object)

开发者ID:rjurney，项目名称:Agile_Data_Code_2，代码行数:15，代码来源:python_kafka.py

示例9: CheckKafka

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]

#.........这里部分代码省略.........

        try:
            if list_partitions:
                if self.topic:
                    self.print_topic_partitions(self.topic)
                else:
                    for topic in self.get_topics():
                        self.print_topic_partitions(topic)
                sys.exit(ERRORS['UNKNOWN'])
        except KafkaError:
            raise CriticalError(self.exception_msg())

        self.partition = self.get_opt('partition')
        # technically optional, will hash to a random partition, but need to know which partition to get offset
        # if self.partition is not None:
        validate_int(self.partition, "partition", 0, 10000)
        self.topic_partition = TopicPartition(self.topic, self.partition)
        self.acks = self.get_opt('acks')
        try:
            self.acks = int(self.acks)
        except ValueError:
            pass
        log_option('acks', self.acks)
        self.validate_thresholds()

    def subscribe(self):
        self.consumer = KafkaConsumer(
            #self.topic,
            bootstrap_servers=self.brokers,
            # client_id=self.client_id,
            # group_id=self.group_id,
            request_timeout_ms=self.timeout_ms
            )
            #key_serializer
            #value_serializer
        # this is only a guess as Kafka doesn't expose it's API version
        #log.debug('kafka api version: %s', self.consumer.config['api_version'])
        log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition))
        # self.consumer.subscribe(TopicPartition(self.topic, self.partition))
        # log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        log.debug('assigning partition {0} to consumer'.format(self.partition))
        # self.consumer.assign([self.partition])
        self.consumer.assign([self.topic_partition])
        log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        log.debug('getting current offset')
        # see also highwater, committed, seek_to_end
        self.start_offset = self.consumer.position(self.topic_partition)
        if self.start_offset is None:
            # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail
            # log.warn('consumer position returned None, resetting to zero')
            # self.start_offset = 0
            raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset))
        log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
        # self.consumer.pause()

    def publish(self):
        log.debug('creating producer')
        self.producer = KafkaProducer(
            bootstrap_servers=self.brokers,
            client_id=self.client_id,
            acks=self.acks,
            batch_size=0,
            max_block_ms=self.timeout_ms,
            request_timeout_ms=self.timeout_ms
            )
            #key_serializer
            #value_serializer
        log.debug('producer.send()')
        self.producer.send(
            self.topic,
            key=self.key,
            partition=self.partition,
            value=self.publish_message
            )
        log.debug('producer.flush()')
        self.producer.flush()

    def consume(self):
        self.consumer.assign([self.topic_partition])
        log.debug('consumer.seek({0})'.format(self.start_offset))
        self.consumer.seek(self.topic_partition, self.start_offset)
        # self.consumer.resume()
        log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms))
        obj = self.consumer.poll(timeout_ms=self.timeout_ms)
        log.debug('msg object returned: %s', obj)
        msg = None
        try:
            for consumer_record in obj[self.topic_partition]:
                if consumer_record.key == self.key:
                    msg = consumer_record.value
                    break
        except KeyError:
            raise UnknownError('TopicPartition key was not found in response')
        if msg is None:
            raise UnknownError("failed to find matching consumer record with key '{0}'".format(self.key))
        return msg

开发者ID:alexschomb，项目名称:nagios-plugins，代码行数:104，代码来源:check_kafka.py

示例10: KafkaChangeFeed

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class KafkaChangeFeed(ChangeFeed):
    """
    Kafka-based implementation of a ChangeFeed
    """
    sequence_format = 'json'

    def __init__(self, topics, client_id, strict=False, num_processes=1, process_num=0):
        """
        Create a change feed listener for a list of kafka topics, a client ID, and partition.

        See http://kafka.apache.org/documentation.html#introduction for a description of what these are.
        """
        self._topics = topics
        self._client_id = client_id
        self._processed_topic_offsets = {}
        self.strict = strict
        self.num_processes = num_processes
        self.process_num = process_num
        self._consumer = None

    def __str__(self):
        return 'KafkaChangeFeed: topics: {}, client: {}'.format(self._topics, self._client_id)

    @property
    def topics(self):
        return self._topics

    def _get_single_topic_or_fail(self):
        if len(self._topics) != 1:
            raise ValueError("This function requires a single topic but found {}!".format(self._topics))
        return self._topics[0]

    def iter_changes(self, since, forever):
        """
        Since must be a dictionary of topic partition offsets.
        """
        timeout = float('inf') if forever else MIN_TIMEOUT
        start_from_latest = since is None
        reset = 'largest' if start_from_latest else 'smallest'
        self._init_consumer(timeout, auto_offset_reset=reset)

        since = self._filter_offsets(since)
        # a special value of since=None will start from the end of the change stream
        if since is not None and (not isinstance(since, dict) or not since):
            raise ValueError("'since' must be None or a topic offset dictionary")

        if not start_from_latest:
            if self.strict:
                validate_offsets(since)

            checkpoint_topics = {tp[0] for tp in since}
            extra_topics = checkpoint_topics - set(self._topics)
            if extra_topics:
                raise ValueError("'since' contains extra topics: {}".format(list(extra_topics)))

            self._processed_topic_offsets = copy(since)

            # Tell the consumer to start from offsets that were passed in
            for topic_partition, offset in since.items():
                self.consumer.seek(TopicPartition(topic_partition[0], topic_partition[1]), int(offset))

        try:
            for message in self.consumer:
                self._processed_topic_offsets[(message.topic, message.partition)] = message.offset
                yield change_from_kafka_message(message)
        except StopIteration:
            assert not forever, 'Kafka pillow should not timeout when waiting forever!'
            # no need to do anything since this is just telling us we've reached the end of the feed

    def get_current_checkpoint_offsets(self):
        # the way kafka works, the checkpoint should increment by 1 because
        # querying the feed is inclusive of the value passed in.
        latest_offsets = self.get_latest_offsets()
        ret = {}
        for topic_partition, sequence in self.get_processed_offsets().items():
            if sequence == latest_offsets[topic_partition]:
                # this topic and partition is totally up to date and if we add 1
                # then kafka will give us an offset out of range error.
                # not adding 1 to the partition means that we may process this
                # change again later, but that should be OK
                sequence = latest_offsets[topic_partition]
            else:
                sequence += 1
            ret[topic_partition] = sequence
        return self._filter_offsets(ret)

    def get_processed_offsets(self):
        return copy(self._processed_topic_offsets)

    def get_latest_offsets(self):
        return self.consumer.end_offsets(self.consumer.assignment())

    def get_latest_offsets_json(self):
        return json.loads(kafka_seq_to_str(self.get_latest_offsets()))

    def get_latest_offsets_as_checkpoint_value(self):
        return self.get_latest_offsets()

    @property
    def consumer(self):
#.........这里部分代码省略.........

开发者ID:dimagi，项目名称:commcare-hq，代码行数:103，代码来源:feed.py

示例11: ClusterZookeeper

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class ClusterZookeeper(object):
    def __init__(self, zookeeper_hosts, kafka_hosts):
        self.groups_dict = {}
        self.topics_dict = {}
        self.brokers_list = []
        self.consumer = KafkaConsumer(bootstrap_servers=kafka_hosts.split(','))
        self.zk = KazooClient(hosts=zookeeper_hosts)
        self.zk.add_listener(self.keep_start)
        self.zk.start()
        if self.zk.exists('/consumers') is None or self.zk.exists('/brokers') is None:
            raise ValueError(zookeeper_hosts + 'is not zookeeper of kafka')
        ChildrenWatch(self.zk, '/consumers', self.groups_watch)
        ChildrenWatch(self.zk, '/brokers/topics', self.topics_watch)
        ChildrenWatch(self.zk, '/brokers/ids/', self.brokers_watch)
        t = threading.Thread(target=self.latest, name=kafka_hosts)
        t.setDaemon(True)
        t.start()

    # 保证链接是可用的
    def keep_start(self, client_status):
        if client_status != 'CONNECTED':
            try:
                self.zk.start()
            except():
                pass

    # 监听consumers节点
    def groups_watch(self, children):
        for group in [group for group in self.groups_dict.keys() if group not in children]:
            self.groups_dict.pop(group)
        for group in [group for group in children if group not in self.groups_dict.keys()]:
            owners_p = '/consumers/' + group + '/owners'
            if self.zk.exists(owners_p) is None:
                continue
            g_o_t = GroupOwnersTopic()
            self.groups_dict[group] = g_o_t
            ChildrenWatch(self.zk, owners_p, g_o_t.g_topic_watch)

    # 监听topic节点
    def topics_watch(self, children):
        for topic in [topic for topic in self.topics_dict.keys() if topic not in children]:
            self.topics_dict.pop(topic)
        for topic in [topic for topic in children if topic not in self.topics_dict.keys()]:
            t_v = TopicValue()
            self.topics_dict[topic] = t_v
            DataWatch(self.zk, '/brokers/topics/' + topic, t_v.topic_watch)
            t_v.topic_partition = [TopicPartition(topic, p) for p in self.consumer.partitions_for_topic(topic)]

    # 监听broker节点
    def brokers_watch(self, children):
        self.brokers_list = children

    def close_zk(self):
        try:
            self.zk.remove_listener(self.keep_start)
            self.zk.stop()
            self.zk.close()
        except():
            pass

    def latest(self):
        while True:
            # time.sleep(0.1)
            time.sleep(0.001)
            for k, v in self.topics_dict.items():
                try:
                    partitions = v.topic_partition
                    self.consumer.assign(partitions)
                    self.consumer.seek_to_end(*partitions)
                    log_offset = reduce(lambda x, y: x + y, [self.consumer.position(p) for p in partitions])
                    now_timestamp = int(time.mktime(time.localtime()))
                    if 'timestamp' in v.__dict__ and v.timestamp is not None:
                        v.speed = (log_offset - v.off_set) / (now_timestamp - v.timestamp)
                    v.timestamp = now_timestamp
                    v.off_set = log_offset
                except Exception as e:
                    pass

开发者ID:siye1982，项目名称:kafkey，代码行数:79，代码来源:cluster_zookeeper.py

示例12: KafkaConsumer

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
from kafka import KafkaConsumer, TopicPartition

topic_name = "test"
consumer = KafkaConsumer(bootstrap_servers=['u1401.ambari.apache.org:6667', 'u1402.ambari.apache.org:6667', 'u1403.ambari.apache.org:6667'])
partitions = [TopicPartition(topic_name, partition) for partition in consumer.partitions_for_topic(topic_name) if partition < 5]
consumer.assign(partitions)
consumer.seek_to_beginning()
for message in consumer:
    print message

开发者ID:avenezia，项目名称:Playing-with-Apache-Kafka，代码行数:11，代码来源:consumer_with_assignment.py

示例13: Consumer

# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import assign [as 别名]
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, topic, group, partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
        )

        if partition_id is not None:
            self._partition_ids = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partition_ids)
        else:
            self._partition_ids = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
            self._consumer.subscribe(topics=[self._topic])
            if self._consumer._use_consumer_group():
                self._consumer._coordinator.ensure_coordinator_known()
                self._consumer._coordinator.ensure_active_group()

        self._consumer._update_fetch_positions(self._partition_ids)
        self._start_looping_call()

    def _start_looping_call(self, interval=60):
        def errback(failure):
            logger.exception(failure.value)
            if failure.frames:
                logger.critical(str("").join(format_tb(failure.getTracebackObject())))
            self._poll_task.start(interval).addErrback(errback)

        self._poll_task = LoopingCall(self._poll_client)
        self._poll_task.start(interval).addErrback(errback)

    def _poll_client(self):
        self._consumer._client.poll()

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partition_ids:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._poll_task.stop()
        self._consumer.commit()
        # getting kafka client event loop running some more and execute commit
        tries = 3
        while tries:
            self.get_messages()
            sleep(2.0)
            tries -= 1
        self._consumer.close()

开发者ID:Preetwinder，项目名称:frontera，代码行数:72，代码来源:kafkabus.py

注：本文中的kafka.KafkaConsumer.assign方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。