当前位置: 首页>>代码示例>>Python>>正文


Python SimpleConsumer.seek方法代码示例

本文整理汇总了Python中kafka.SimpleConsumer.seek方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.seek方法的具体用法?Python SimpleConsumer.seek怎么用?Python SimpleConsumer.seek使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在kafka.SimpleConsumer的用法示例。


在下文中一共展示了SimpleConsumer.seek方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_ts

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
    def test_ts(self):

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        # consumer = SimpleConsumer(kafka, "my-group112", "test")
        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
                                  fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)

        while True:
            print("HELLO")
            # Prepare data for insert and copy to S3
            # data_str = StringIO()
            count = 0
            # last_offset = 2

            consumer.seek(2, 0)

            for message in consumer.get_messages(count=100, block=False, timeout=0.1):
                count += 1

                print(message.message.value)

            #     # Write tweets to StringIO
            #     self.write_to_data_str(message, data_str)

            # # Store batch tweets to S3
            # self.write_to_s3(data_str, last_offset)

            if count != 100:
                break
开发者ID:eatseng,项目名称:insight,代码行数:32,代码来源:location_streams.py

示例2: setup_capture_new_messages_consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close()
开发者ID:joshSzep,项目名称:data_pipeline,代码行数:14,代码来源:kafka_docker.py

示例3: run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
    def run(self, options=None):

        # try:

        # Create table if it doesn't exist in the database
        if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
            self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)

        kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))

        consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
                                  buffer_size=2000000000, max_buffer_size=2000000000)

        while True:

            # Prepare data for insert and copy to S3
            data_str = StringIO()
            csv_str = StringIO()
            count = 0

            # Get Offset from previous read
            s3_last_offset = self.get_s3_offset()

            (last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
            last_offset = last_offset if last_offset else 0

            # Resolve difference in offset (s3 offset does not carry over from day to day)
            if s3_last_offset > last_offset:
                last_offset = s3_last_offset
                self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            print(last_offset)

            # Read from Offset
            consumer.seek(last_offset, 0)

            for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):

                # Write tweets to StringIO
                self.write_to_data_str(message, data_str, csv_str)

                count += 1
                last_offset += 1

            # Store batch tweets to S3
            self.write_to_s3(data_str, csv_str, last_offset)

            # Track Kafka Offset
            self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))

            if count != self.BATCH_SIZE:
                break
开发者ID:eatseng,项目名称:insight,代码行数:54,代码来源:location_streams.py

示例4: HBaseServer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class HBaseServer(threading.Thread):
    """
    HBase thread that will continuously read from Kafka queue
    """

    def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table):
        threading.Thread.__init__(self)
        
        self.kafka = KafkaClient(kafka_url)
        self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
        self.cons.seek(0,2)
        
        self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port)
        self.car_table = self.hbase_connect.table(hbase_table)
        
        self.server_on_flag = True        
        self.m = None
        self.payload = None
        self.vin = None
        self.time = None
        self.data = None
        self.row_key = None
        self.count = 0

    def run(self):
        while self.server_on_flag:

            self.m = self.cons.get_message(block=False)
           
            if (self.m is not None):
                self.payload = json.loads(self.m.message.value)
                self.vin = str(self.payload['vin'])
                self.time = str(self.payload['timestamp'])
                self.data = str(self.payload['data'])
                
                self.row_key = self.vin+self.time
                try:
                    self.car_table.put(self.vin,{'user:mostrecent':self.time})
                    self.car_table.put(self.row_key,{'car:data':self.data})
                    self.count = self.count + 1
                    logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count))     
           
                except Exception as e:
                    logger.info('%s,Data Push into HBase unsuccessful...', e)

            else:
                sleep(1/5)

    def shutdown(self):
        self.server_on_flag = False
        logger.info('HBase Server shutting down...')
开发者ID:PDXostc,项目名称:rvi_backend,代码行数:53,代码来源:hbaseserver.py

示例5: RVIConsumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class RVIConsumer(threading.Thread):

    def __init__(self, kafka_addr, topic, vin, web_url):
        threading.Thread.__init__(self)

        self.kafka = KafkaClient(kafka_addr) #kafka_addr
        self.cons = SimpleConsumer(self.kafka, None, topic)
        self.cons.seek(0,2)

        self.vin = vin
        self.web_url = web_url 
        self.flag = True
        self.count = 0
        self.sleep_count = 0
        self.headers = {'Content-Type' : 'application/json'}

    def is_running(self):
        return self.flag
        
    def run(self):
        while self.flag:
            
            #cons = SimpleConsumer(kafka, None, 'rvi')
            m = self.cons.get_message(block=False)
            if (m is not None):
                payload = json.loads(m.message.value)

                if(payload['vin'] == self.vin):
                    self.sleep_count = 0 
                    payloadtoweb = json.dumps(m.message.value)
                    r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers) 
                    if (r.status_code is 200):
                        print m.message.value + " sent successfully\n"        
                    else: 
                        print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code)
                        self.shutdown()       

            else:
                if (self.sleep_count > 100000):
                    print "No new data for %s... Timing out" % self.vin
                    self.shutdown()

                time.sleep(1/5)
                self.sleep_count = self.sleep_count + 1

    def shutdown(self):
        self.flag = False    
        requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers) 
        print "%s consumer thread shutting down" % self.vin 
开发者ID:PDXostc,项目名称:rvi_big-data2_dashboard,代码行数:51,代码来源:rviwebconsumer.py

示例6: read_kafka

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def read_kafka():
    """
    read socialSignal, keep if non are zero, save to mongo social/socialSignal
    :return:
    """
    msg_buffer = dict()
    ids = set()
    in_kafka = KafkaClient(settings.IN_SOCIAL_SIGNAL_KAFKA)
    consumer = SimpleConsumer(in_kafka, 'comment.pages1', 'comment.pages', max_buffer_size=20 * 1024 * 1024,
                              fetch_size_bytes=2 * 1024 * 1024, buffer_size=2 * 1024 * 1024)
    consumer.seek(0, 0)

    for msg in consumer:
        if "001WxC6D" in msg.message.value:
            print msg.message.value
开发者ID:razhong,项目名称:randy,代码行数:17,代码来源:comment.pages.py

示例7: Consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class Consumer(Thread):
    def __init__(self, args=()):
        super(Consumer, self).__init__()
        self.host = args[0]
        self.port = args[1]
        self.topic = args[2]
        print '[KafkaConsumer] host: {0}, port: {1}, topic: {2}'.format(self.host, self.port, self.topic)
        self.consumer = None
        self.consumer_keep_run = True
        self.consumer_paused = False
        self.consumer_subscribers = []

    def run(self):
        client = kafka_client(self.host, self.port)
        self.consumer = SimpleConsumer(client, None, self.topic)
        self.consumer.seek(0, 1)

        while self.consumer_keep_run:
            print '[KafkaConsumer] looping..'
            if not self.consumer_paused:
                for message in self.consumer.get_messages(block=False):
                    offset = message.offset
                    value = message.message.value
                    j_encoded = json.dumps({'offset': offset, 'message': value})
                    print '[KafkaConsumer] {}'.format(j_encoded)

                    for subscriber in self.consumer_subscribers:
                        IOLoop.instance().add_callback(partial(subscriber.send_message, j_encoded))
            time.sleep(1)

    def pause_consumer(self, paused):
        self.consumer_paused = paused

    def stop_consumer(self):
        self.consumer_keep_run = False

    def add_subscriber(self, subscriber):
        self.consumer_subscribers.append(subscriber)

    def remove_subscriber(self, subscriber):
        self.consumer_subscribers.remove(subscriber)

    def get_subscribers_length(self):
        length = len(self.consumer_subscribers)
        return length

    def get_subscribers(self):
        return self.subscribers
开发者ID:doddyph,项目名称:django-tornado-demo,代码行数:50,代码来源:kafka_usage.py

示例8: kafka_stream

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def kafka_stream():
    # global visualization_topic 
    # topic = visualization_topic 
    # print "DEBUG stream topic: " + topic
    topic = "web"
    kafka = KafkaClient("localhost:9092")
    consumer = SimpleConsumer(kafka, "python", topic)
    consumer.seek(offset=0, whence=2)
    # topic = None

    def gen():
        for message in consumer:
            yield 'data: %s\n\n' %str(message.message.value)

    print "DEBUG: Kafka Stream Connected"
    return Response(gen(), mimetype="text/event-stream")
开发者ID:ZhuangER,项目名称:social-network-analysi-with-lambda-architecture,代码行数:18,代码来源:app.py

示例9: read_kafka

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def read_kafka(docid):
    """
    read socialSignal, keep if non are zero, save to mongo social/socialSignal
    :return:
    """
    msg_buffer = dict()
    ids = set()
    in_kafka = KafkaClient(settings.IN_SOCIAL_SIGNAL_KAFKA)
    consumer = SimpleConsumer(in_kafka, 'test0', TOPIC, max_buffer_size=20 * 1024 * 1024,
                              fetch_size_bytes=2 * 1024 * 1024, buffer_size=2 * 1024 * 1024)
    consumer.seek(6000000, 0)

    for msg in consumer:
        if msg.offset % 100000 == 0:
            print 'working on ', msg.offset
        if docid in msg.message.value:
            print msg.message.value
开发者ID:razhong,项目名称:randy,代码行数:19,代码来源:kafka_test.py

示例10: main

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def main():
    """
    Usage:
        dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
    """
    args = docopt(main.__doc__)
    host = args["--host"]

    print "=> Connecting to {0}...".format(host)
    logger.info("=> Connecting to {0}...".format(host))
    kafka = KafkaClient(host)
    print "=> Connected."
    logger.info("=> Connected.")
    if args["dump"]:
        topic = args["<topic>"]
        consumer_id = args["--consumer"] or "dump_to_mongodb"
        consumer = SimpleConsumer(kafka, consumer_id, topic,
                                  buffer_size=1024*200,      # 100kb
                                  fetch_size_bytes=1024*200, # 100kb
                                  max_buffer_size=None       # eliminate big message errors
                                  )
        consumer.seek(0, 1)
        while True:
            try:
                message = consumer.get_message()
                if message is None:
                    time.sleep(1)
                    continue
                val = message.message.value
                logger.info("message.message.value== %s " % val)
                print('val==', val)
                try:
                    item = json.loads(val)
                except:
                    continue
                if 'meta' in item and 'collection_name' in item['meta']:
                    _insert_item_to_monggodb(item)
            except:
                traceback.print_exc()
                break
        kafka.close()
        return 0
开发者ID:mtaziz,项目名称:jaycluster,代码行数:44,代码来源:dump_to_mongodb.py

示例11: event_consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def event_consumer(kafka_config, lock, trigger_manager):
    kafka_url = kafka_config['url']
    group = kafka_config['event_group']
    # read from the 'transformed_events_topic' in the future
    # reading events sent from API POST Event now
    topic = kafka_config['events_topic']
    kafka = KafkaClient(kafka_url)
    consumer = SimpleConsumer(kafka,
                              group,
                              topic,
                              auto_commit=True,
                              # auto_commit_every_n=None,
                              # auto_commit_every_t=None,
                              # iter_timeout=1,
                              fetch_size_bytes=kafka_config[
                                  'events_fetch_size_bytes'],
                              buffer_size=kafka_config['events_buffer_size'],
                              max_buffer_size=kafka_config['events_max_buffer_size'])
    consumer.seek(0, 2)

    for e in consumer:
        log.debug('Received an event')
        offset, message = e
        envelope = json.loads(message.value)
        event = envelope['event']
        # convert iso8601 string to a datetime for winchester
        # Note: the distiller knows how to convert these, based on
        # event_definitions.yaml
        if 'timestamp' in event:
            event['timestamp'] = iso8601.parse_date(
                event['timestamp'],
                default_timezone=None)
        if 'launched_at' in event:
            event['launched_at'] = iso8601.parse_date(
                event['launched_at'],
                default_timezone=None)

        lock.acquire()
        trigger_manager.add_event(event)
        lock.release()
开发者ID:oneilcin,项目名称:monasca-event,代码行数:42,代码来源:event_processor.py

示例12: pipe_stream_definition_consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def pipe_stream_definition_consumer(kafka_config, lock, pipe):
    kafka_url = kafka_config['url']
    group = kafka_config['stream_def_pipe_group']
    topic = kafka_config['stream_def_topic']
    kafka = KafkaClient(kafka_url)
    consumer = SimpleConsumer(kafka,
                              group,
                              topic,
                              auto_commit=True,
                              # auto_commit_every_n=None,
                              # auto_commit_every_t=None,
                              # iter_timeout=1,
                              fetch_size_bytes=kafka_config[
                                  'events_fetch_size_bytes'],
                              buffer_size=kafka_config['events_buffer_size'],
                              max_buffer_size=kafka_config['events_max_buffer_size'])

    consumer.seek(0, 2)

    for s in consumer:
        offset, message = s
        stream_def = json.loads(message.value)

        if 'stream-definition-created' in stream_def:
            log.debug('Received a stream-definition-created event')
            stream_create = event_processor.stream_def_to_winchester_format(
                stream_def['stream-definition-created'])
            lock.acquire()
            pipe.add_trigger_definition(stream_create)
            lock.release()
        elif 'stream-definition-deleted' in stream_def:
            log.debug('Received a stream-definition-deleted event')
            name = event_processor.stream_unique_name(
                stream_def['stream-definition-deleted'])
            lock.acquire()
            pipe.delete_trigger_definition(name)
            lock.release()
        else:
            log.error('Unknown event received on stream_def_topic')
开发者ID:oneilcin,项目名称:monasca-event,代码行数:41,代码来源:pipeline_processor.py

示例13: event_consumer

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
    def event_consumer(self, conf, lock, trigger_manager):
        kafka_url = conf.kafka.url
        group = conf.kafka.event_group
        topic = conf.kafka.events_topic
        kafka = KafkaClient(kafka_url)
        consumer = SimpleConsumer(kafka, group, topic, auto_commit=True)

        consumer.seek(0, 2)

        statsd = monascastatsd.Client(name="monasca", dimensions=self.dimensions)
        events_consumed = statsd.get_counter("events_consumed")
        events_persisted = statsd.get_counter("events_persisted")

        for e in consumer:
            log.debug("Received an event")
            events_consumed.increment()
            offset, message = e
            envelope = json.loads(message.value)
            event = envelope["event"]

            if "timestamp" in event:
                event["timestamp"] = iso8601.parse_date(event["timestamp"], default_timezone=None)

            lock.acquire()
            try:
                # should have add_event return True or False
                prev_saved_events = trigger_manager.saved_events
                trigger_manager.add_event(event)
                if trigger_manager.saved_events > prev_saved_events:
                    events_persisted.increment()
                else:
                    log.warning("Invalid or Duplicate Event. " "Could not add_event to mysql.")
            except Exception as e:
                log.exception(e)
            finally:
                lock.release()
开发者ID:oneilcin,项目名称:monasca-events-engine,代码行数:38,代码来源:event_processor.py

示例14: TestRedisMonitor

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class TestRedisMonitor(TestCase):

    maxDiff = None
    queue_key = "link:istresearch.com:queue"

    def setUp(self):
        self.redis_monitor = RedisMonitor("localsettings.py")
        self.redis_monitor.settings = self.redis_monitor.wrapper.load("localsettings.py")
        self.redis_monitor.logger = MagicMock()
        self.redis_monitor.settings['KAFKA_TOPIC_PREFIX'] = "demo_test"
        self.redis_monitor.settings['STATS_TOTAL'] = False
        self.redis_monitor.settings['STATS_PLUGINS'] = False
        self.redis_monitor.settings['PLUGINS'] = {
            'plugins.info_monitor.InfoMonitor': None,
            'plugins.stop_monitor.StopMonitor': None,
            'plugins.expire_monitor.ExpireMonitor': None,
            'tests.tests_online.CustomMonitor': 100,
        }
        self.redis_monitor.redis_conn = redis.Redis(
            host=self.redis_monitor.settings['REDIS_HOST'],
            port=self.redis_monitor.settings['REDIS_PORT'])

        self.redis_monitor._load_plugins()
        self.redis_monitor.stats_dict = {}

        self.kafka_conn = KafkaClient(self.redis_monitor.settings[
                                      'KAFKA_HOSTS'])
        self.kafka_conn.ensure_topic_exists("demo_test.outbound_firehose")

        self.consumer = SimpleConsumer(
            self.kafka_conn,
            "demo-id",
            "demo_test.outbound_firehose"
        )

    def test_process_item(self):
        # we only want to go to the end now, not after this test is ran
        self.consumer.seek(0, 2)

        # set the info flag
        key = "info-test:blah"
        value = "ABC123"
        self.redis_monitor.redis_conn.set(key, value)

        # process the request
        plugin = self.redis_monitor.plugins_dict.items()[0][1]
        self.redis_monitor._process_plugin(plugin)

        # ensure the key is gone
        self.assertEquals(self.redis_monitor.redis_conn.get(key), None)

    def test_sent_to_kafka(self):
        success = {
            u'info-test': "ABC123",
            u"appid": u"someapp"
        }

        # ensure it was sent out to kafka
        message_count = 0
        for message in self.consumer.get_messages():
            if message is None:
                break
            else:
                the_dict = json.loads(message.message.value)
                self.assertEquals(success, the_dict)
                message_count += 1

        self.assertEquals(message_count, 1)
开发者ID:01-,项目名称:scrapy-cluster,代码行数:70,代码来源:tests_online.py

示例15: _run

# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
开发者ID:gtesseyre,项目名称:contrail-controller,代码行数:74,代码来源:partition_handler.py


注:本文中的kafka.SimpleConsumer.seek方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。