当前位置: 首页>>代码示例>>Python>>正文


Python consumer.SimpleConsumer类代码示例

本文整理汇总了Python中kafka.consumer.SimpleConsumer的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer类的具体用法?Python SimpleConsumer怎么用?Python SimpleConsumer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SimpleConsumer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

    def run(self):
        client = KafkaClient(self.bootstrap_server, client_id='commandline')
        consumer = SimpleConsumer(client, self.group, self.topic, auto_commit_every_n=1, buffer_size=160,
                                  auto_commit=True)

        for message in consumer:
            now = datetime.now()
            print("%s: %s" % (now, message))
            consumer.commit()
开发者ID:danielricart,项目名称:kafka-client-demo,代码行数:9,代码来源:consumer.py

示例2: main

def main():
    client = KafkaClient("localhost:9092")
    consumer = SimpleConsumer(client, "test-group", "twitter_raw")
    consumer.seek(0,2)

    num = 0
    for message in consumer:
        print "redis publish:", num
        num+=1
        try:
            data_depickled = pickle.loads(message.message.value.decode('utf-8'))
        except Exception, e:
            continue
        # print data_depickled
        # {  
        #    'text':'@_LulaMoore me hamas perra',
        #    'created_at':datetime.datetime(2015, 10, 9, 23, 36, 49),
        #    'source':u'Twitter Web Client',
        #    'lang:':u'es',
        #    'place':{  
        #       'country_code':u'AR',
        #       'coordinates':[  
        #          [  
        #             -68.176283,
        #             -38.984724
        #          ],
        #          [  
        #             -68.176283,
        #             -38.921051
        #          ],
        #          [  
        #             -68.015162,
        #             -38.921051
        #          ],
        #          [  
        #             -68.015162,
        #             -38.984724
        #          ]
        #       ]
        #    },
        #    'user':{  
        #       'statuses_count':15067,
        #       'name':u'Dama negra *\uffe6*',
        #       'friends_count':390,
        #       'created_at':datetime.datetime(2014, 3, 15,2,37, 10),
        #       'profile_image_url': u'http://pbs.twimg.com/profile_images/652333268256313344/x9K9Nlys_normal.jpg',
        #       'followers_count':384,
        #       'id':2390242428
        #    },
        #    'id':652628813935980544
        # }

        ### process data here ###
        # text = data_depickled['text']
        filtered_data = data_filter(data_depickled)
        data_pickled = pickle.dumps(filtered_data)
        redis.publish('tweets_processed', data_pickled)
开发者ID:krist-jin,项目名称:tweets-map,代码行数:57,代码来源:simpleSub.py

示例3: blocking_consumer

    def blocking_consumer(self, message_consume_function, parse_json, topic_group, topic_name):
        print "starting blocking consumer with topic group %s and topic name %s" % (topic_group, topic_name)
        consumer = SimpleConsumer(self.client, topic_group, topic_name)
        consumer.seek(0,2)

        for message in consumer:
            message = parse_json(message)
            print "=============" + str(message) + "============"
            message_consume_function(message)
            print "called message consume function"
开发者ID:pombredanne,项目名称:splash-kafka,代码行数:10,代码来源:kafkaclient.py

示例4: main

def main():
    client = KafkaClient("localhost:9092")
    consumer = SimpleConsumer(client, "test-group", "twitter_raw")
    consumer.seek(0,2)

    for message in consumer:
        # data_deserialized = str.decode(message.message.value)
        data_depickled = pickle.loads(message.message.value.decode('utf-8'))
        # print str(data_depickled).decode('string_escape')
        print data_depickled
开发者ID:krist-jin,项目名称:tweets-map,代码行数:10,代码来源:testSub.py

示例5: run

    def run(self):
        client = KafkaClient("10.206.216.13:19092,10.206.212.14:19092,10.206.209.25:19092")
        consumer = SimpleConsumer(client, "test-group", "jiketest",auto_commit=False,partitions=self.part)

        consumer.seek(0,0)

        while True:
            message = consumer.get_message(True,60)
            self.__offset = message.offset
            print message.message.value
开发者ID:2lusy,项目名称:learndemo,代码行数:10,代码来源:partition_consumer.py

示例6: Consumer

class Consumer(object):
    def __init__(self, addr, group, topic):
        """Initialize Consumer with kafka broker IP, group, and topic."""
        self.client = KafkaClient(addr)
        self.consumer = SimpleConsumer(self.client, group, topic,
                                       max_buffer_size=1310720000)
        self.temp_file_path = None
        self.temp_file = None
        self.hadoop_path = "/insight/artsy/geo"
        self.topic = topic
        self.group = group
        self.block_cnt = 0

    def consume_topic(self, output_dir):
        """Consumes a stream of messages from the "post_geo_activity" topic.
        Code template from https://github.com/ajmssc/bitcoin-inspector.git
        """
        timestamp = time.strftime('%Y%m%d%H%M%S')
        
        # open file for writing
        self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,self.topic,self.group,timestamp)
        self.temp_file = open(self.temp_file_path,"w")

        while True:
            try:
                # get 1000 messages at a time, non blocking
                messages = self.consumer.get_messages(count=1000, block=False)
                for message in messages:
                    self.temp_file.write(message.message.value + "\n")

                # file size > 20MB
                if self.temp_file.tell() > 20000000:
                    self.flush_to_hdfs(output_dir)

                self.consumer.commit()
            except:
                # move to tail of kafka topic if consumer is referencing
                # unknown offset
                self.consumer.seek(0, 2)


    def flush_to_hdfs(self, output_dir):
        """Flushes the 20MB file into HDFS."""
        self.temp_file.close()
        timestamp = time.strftime('%Y%m%d%H%M%S')
        hadoop_fullpath = "%s/%s_%s_%s.dat" % (self.hadoop_path, self.group,self.topic, timestamp)

        print "Block {}: Flushing data file to HDFS => {}".format(str(self.block_cnt),hadoop_fullpath)
        self.block_cnt += 1
        os.system("hdfs dfs -put %s %s" % (self.temp_file_path, hadoop_fullpath)) # save from local to hdfs
        os.remove(self.temp_file_path) # remove temp local file
        timestamp = time.strftime('%Y%m%d%H%M%S')
        self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,self.topic,self.group,timestamp)
        self.temp_file = open(self.temp_file_path, "w")
开发者ID:keiraqz,项目名称:artmosphere,代码行数:54,代码来源:hdfs_consumer.py

示例7: KafkaDatawakeLookaheadSpout

class KafkaDatawakeLookaheadSpout(Spout):
    group = 'datawake-crawler-out-consumer'.encode()

    def __init__(self):
        Spout.__init__(self)
        self.queue = None

    def initialize(self, stormconf, context):
        try:
            settings = all_settings.get_settings(stormconf['topology.deployment'])
            self.topic = settings['crawler-out-topic'].encode()
            self.conn_pool = settings['conn_pool'].encode()
            self.log('KafkaDatawakeLookaheadSpout initialized with topic =' + self.topic + ' conn_pool=' + self.conn_pool)
            self.kafka = KafkaClient(self.conn_pool)
            self.consumer = SimpleConsumer(self.kafka, self.group, self.topic, max_buffer_size=None)
            self.consumer.seek(0, 2)  # move to the tail of the queue
        except:
            self.log("KafkaDatawakeLookaheadSpout initialize error", level='error')
            self.log(traceback.format_exc(), level='error')
            raise


    def next_tuple(self):
        """
        input message:
            dict(
                 id = input['id'],
                 appid = input['appid'],
                 url = url,
                 status_code = response.getcode(),
                 status_msg = 'Success',
                 timestamp = response.info()['date'],
                 links_found = links,
                 raw_html =  html,
                 attrs = input['attrs']
            )
        :return:  (url, status, headers, flags, body, timestamp, source,context)
        """

        offsetAndMessage = self.consumer.get_messages(timeout=None)[0]
        message = offsetAndMessage.message.value

        crawled = json.loads(message)
        safeurl = crawled['url'].encode('utf-8', 'ignore')
        self.log("Lookahead spout received id: " + crawled['id'] + " url: " + safeurl)
        context = {
            'source': 'datawake-lookahead',
            'userId': crawled['attrs']['userId'],
            'org': crawled['attrs']['org'],
            'domain': crawled['attrs']['domain'],
            'url': crawled['url']
        }
        self.emit([crawled['url'], crawled['status_code'], '', '', crawled['raw_html'], crawled['timestamp'], context['source'], context])
开发者ID:diffeo,项目名称:Datawake,代码行数:53,代码来源:kafka_spouts.py

示例8: spiderIdle

    def spiderIdle(self, spider):
        consumer = SimpleConsumer(self.kafka_conn, "test", "commands")
        for msg in consumer.get_messages():
            print msg.message.value
            if msg.message.value == spider.name + "_stop":
                print "stop"
                spider.spider_pause()
                # spider.close(spider,'ok')
                # self.scrapy.engine.close_spider(spider, 'closespider_itemcount')

            if msg.message.value == spider.name + "_start":
                # self.scrapy.engine.scraper.open_spider(spider)
                spider.spider_resume()
开发者ID:tongji1907,项目名称:chique,代码行数:13,代码来源:kafka_controller.py

示例9: __init__

 def __init__(self,conn_pool,topic,group):
     self.conn_pool = conn_pool
     self.topic = topic
     self.group = group
     self.kafka = KafkaClient(self.conn_pool)
     self.consumer = SimpleConsumer(self.kafka,self.group,self.topic,max_buffer_size=None)
     self.consumer.seek(0,2) # move to the tail of the queue
开发者ID:Sotera,项目名称:Datawake-Legacy,代码行数:7,代码来源:kafka_consumer.py

示例10: Consumer

class Consumer(object):

    def __init__(self, addr):
        self.client = KafkaClient(addr)
        self.topic = "steps_data_part4"
        self.consumer_group = 's3_consumer' 
        self.consumer = SimpleConsumer(self.client, self.consumer_group, self.topic)

    def consume_message(self):
        while True:
            timestamp = time.strftime('%Y%m%d%H%M%S')
            temp_file_name = "%s_%s_%s.dat" %(self.topic, self.consumer_group, timestamp)
            temp_file = open("/home/ubuntu/rankMyStep/kafka/"+temp_file_name,"w")
            messages = self.consumer.get_messages(count=1000, block=False)
            for msg in messages:
                print msg.message.value + "\n"
                temp_file.write(msg.message.value + "\n")
            self.save_to_s3(temp_file_name)

    def save_to_s3(self, file_name):
        mybucket = "anurag-raw-data-store"
        aws_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'default')
        aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'default')
        s3_client = boto3.client('s3')
        s3_client.upload_file("/home/ubuntu/rankMyStep/kafka/"+file_name, 
                              mybucket,"rankmysteps/"+file_name)
        os.remove("/home/ubuntu/rankMyStep/kafka/"+file_name)
开发者ID:bigdata2,项目名称:rankMySteps,代码行数:27,代码来源:kafka_s3_consumer.py

示例11: __init__

 def __init__(self, addr, group, topic):
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
     self.temp_file_path = None
     self.temp_file = None
     self.topic = topic
     self.group = group
     self.block_cnt = 0
开发者ID:raveendrau,项目名称:FantasySportsLeagues,代码行数:8,代码来源:kafka_hdfs_consumer.py

示例12: KafkaDatawakeVisitedSpout

class KafkaDatawakeVisitedSpout(Spout):
    group = 'datawake-visited-consumer'.encode()

    def __init__(self):
        Spout.__init__(self)
        self.queue = None

    def initialize(self, stormconf, context):
        try:
            settings = all_settings.get_settings(stormconf['topology.deployment'])
            self.topic = settings['visited-topic'].encode()
            self.conn_pool = settings['conn_pool'].encode()
            self.log('KafkaDatawakeVisitedSpout initialized with topic =' + self.topic + ' conn_pool=' + self.conn_pool)
            self.kafka = KafkaClient(self.conn_pool)
            self.kafka.ensure_topic_exists(self.topic)
            self.consumer = SimpleConsumer(self.kafka, self.group, self.topic, max_buffer_size=None)
            self.consumer.seek(0, 2)  # move to the tail of the queue
        except:
            self.log("KafkaDatawakeVisitedSpout initialize error", level='error')
            self.log(traceback.format_exc(), level='error')
            raise

    def next_tuple(self):
        """
        input:  (timestamp,org,domain,user_id,url,html)
        :return:  (url, status, headers, flags, body, timestamp, source,context)
        """
        try:
            for message in self.consumer:
                self.log("msg")
                self.log(message)
                #offsetAndMessage = self.consumer.get_messages(timeout=None)[0]
                message = message.split('\0')
                (timestamp, org, domain, userId, url, html) = message
                context = {
                    'source': 'datawake-visited',
                    'domain': domain
                }
                self.emit([url, '', '', '', html, timestamp, context['source'], context])
        except:
            self.log(traceback.format_exc(), level='error')

    def fail(self, tup_id):
	pass 
开发者ID:Sotera,项目名称:datawake-prefetch,代码行数:44,代码来源:kafka_spouts.py

示例13: CrawlerSpout

class CrawlerSpout(Spout):

    group = 'datawake-crawler-in-consumer'.encode()


    def initialize(self, stormconf, context):
        try:
            settings = all_settings.get_settings(stormconf['topology.deployment'])
            self.topic = settings['crawler-in-topic'].encode()
            self.conn_pool = settings['conn_pool'].encode()
            self.log('CrawlerSpout initialized with topic ='+self.topic+' conn_pool='+self.conn_pool)
            self.kafka = KafkaClient(self.conn_pool)
	    self.kafka.ensure_topic_exists(self.topic)
            self.consumer = SimpleConsumer(self.kafka,self.group,self.topic,max_buffer_size=None, fetch_size_bytes=2000000)
            self.consumer.seek(0,2) # move to the tail of the queue
        except:
            self.log("CrawlerSpout initialize error",level='error')
            self.log(traceback.format_exc(),level='error')
            raise

    def next_tuple(self):
        """
        input message:
             json.dumps(dict(
                    id = 'abcdefg', #TODO generate UUID,
                    appid = self.appid,
                    url = url,
                    priority = 50,
                    depth = 0,
                    attrs  = dict(
                        userId = context['userId'],
                        org =  context['org'],
                        domain = context['domain']
                    )
                ))
        :return:
        """
        try:
            for message in self.consumer:
                to_crawl = json.loads(message)
                self.emit([to_crawl])
        except:
            self.log(traceback.format_exc(),level='error')
开发者ID:Sotera,项目名称:datawake-prefetch,代码行数:43,代码来源:crawler_spout.py

示例14: __init__

 def __init__(self, addr, group, topic):
     """Initialize Consumer with kafka broker IP, group, and topic."""
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
     self.temp_file_path = None
     self.temp_file = None
     self.hadoop_path = "/user/parking_data/history"
     self.topic = topic
     self.group = group
     self.block_cnt = 0
开发者ID:suhashm,项目名称:ParkMate,代码行数:10,代码来源:kafka_to_hdfs.py

示例15: __init__

 def __init__(self, addr, group, topic):
     self.client = KafkaClient(addr)
     self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
     self.temp_file_path = None
     self.temp_file = None
     self.hadoop_path = "/user/AdReport/%s/history" %(topic)
     self.cached_path = "/user/AdReport/%s/cached" % (topic)
     self.topic = topic
     self.group = group
     self.block_cnt = 0
开发者ID:prarthanabhattarai,项目名称:AdReportProject,代码行数:10,代码来源:kafka_consumer_bids.py


注:本文中的kafka.consumer.SimpleConsumer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。