本文整理汇总了Python中kafka.SimpleConsumer类的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer类的具体用法?Python SimpleConsumer怎么用?Python SimpleConsumer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SimpleConsumer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: consume_topic
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
timestamp = standardized_timestamp(frequency)
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
#open file for writing
tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
tempfile = open(tempfile_path, "w")
#log_has_at_least_one = False #did we log at least one entry?
while True:
# get 1000 messages at a time, non blocking
messages = kafka_consumer.get_messages(count=100, block=False)
if not messages:
#print "no messages to read"
continue # If no messages are received, wait until there are more
for message in messages:
#log_has_at_least_one = True
#print(message.message.value)
#tempfile.write(message.message.value + "\n") # lose the '\n'?
tempfile.write(message.message.value)
if tempfile.tell() > 120000000: # file size > 120MB
print "Note: file is large enough to write to hdfs. Writing now..."
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() # inform zookeeper of position in the kafka queue
示例2: assert_message_count
def assert_message_count(self, topic, check_count, timeout=10,
partitions=None, at_least=False):
hosts = ','.join(['%s:%d' % (broker.host, broker.port)
for broker in self.brokers])
client = KafkaClient(hosts)
consumer = SimpleConsumer(client, None, topic,
partitions=partitions,
auto_commit=False,
iter_timeout=timeout)
started_at = time.time()
pending = consumer.pending(partitions)
# Keep checking if it isn't immediately correct, subject to timeout
while pending < check_count and (time.time() - started_at < timeout):
pending = consumer.pending(partitions)
time.sleep(0.5)
consumer.stop()
client.close()
if pending < check_count:
self.fail('Too few pending messages: found %d, expected %d' %
(pending, check_count))
elif pending > check_count and not at_least:
self.fail('Too many pending messages: found %d, expected %d' %
(pending, check_count))
return True
示例3: test_switch_leader_simple_consumer
def test_switch_leader_simple_consumer(self):
producer = Producer(self.client, async=False)
consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
self._send_random_messages(producer, self.topic, 0, 2)
consumer.get_messages()
self._kill_leader(self.topic, 0)
consumer.get_messages()
示例4: assert_message_count
def assert_message_count(self, topic, check_count, timeout=10,
partitions=None, at_least=False):
hosts = ','.join(['%s:%d' % (broker.host, broker.port)
for broker in self.brokers])
client = SimpleClient(hosts, timeout=2)
consumer = SimpleConsumer(client, None, topic,
partitions=partitions,
auto_commit=False,
iter_timeout=timeout)
started_at = time.time()
pending = -1
while pending < check_count and (time.time() - started_at < timeout):
try:
pending = consumer.pending(partitions)
except FailedPayloadsError:
pass
time.sleep(0.5)
consumer.stop()
client.close()
if pending < check_count:
self.fail('Too few pending messages: found %d, expected %d' %
(pending, check_count))
elif pending > check_count and not at_least:
self.fail('Too many pending messages: found %d, expected %d' %
(pending, check_count))
return True
示例5: test_ts
def test_ts(self):
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
# consumer = SimpleConsumer(kafka, "my-group112", "test")
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)
while True:
print("HELLO")
# Prepare data for insert and copy to S3
# data_str = StringIO()
count = 0
# last_offset = 2
consumer.seek(2, 0)
for message in consumer.get_messages(count=100, block=False, timeout=0.1):
count += 1
print(message.message.value)
# # Write tweets to StringIO
# self.write_to_data_str(message, data_str)
# # Store batch tweets to S3
# self.write_to_s3(data_str, last_offset)
if count != 100:
break
示例6: consume
def consume(kafka_host):
kafka = KafkaClient(kafka_host)
consumer = SimpleConsumer(kafka, 'fetcher', cfg['kafka']['pages'])
producer = SimpleProducer(kafka)
consumer.max_buffer_size=20*1024*1024
for msg in consumer:
page = json.loads(msg.message.value)
process(page, producer)
kafka.close()
示例7: consume_save
def consume_save(group,topic):
# tmp_save=open(tmp_file_path,"w")
while True:
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
if not messages:
print "Consumer didn't read any messages"
for message in messages:
# tmp_save.write( message.message.value+"\n")
print message.message.value+"\n"
示例8: consume_save
def consume_save(group,topic):
tmp_save=open(tmp_file_path,"w")
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
if not messages:
print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
# print message.message.value+"\n"
kafka_consumer.commit() # inform zookeeper of position in the kafka queu
print ".... ... .. .."
print "Message from topic \"%s\" consumed \n" % topic
示例9: setup_capture_new_messages_consumer
def setup_capture_new_messages_consumer(topic):
"""Seeks to the tail of the topic then returns a function that can
consume messages from that point.
"""
kafka = KafkaClient(get_config().cluster_config.broker_list)
group = str('data_pipeline_clientlib_test')
consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
consumer.seek(0, 2) # seek to tail, 0 is the offset, and 2 is the tail
yield consumer
kafka.close()
示例10: run
def run(self, options=None):
# try:
# Create table if it doesn't exist in the database
if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
buffer_size=2000000000, max_buffer_size=2000000000)
while True:
# Prepare data for insert and copy to S3
data_str = StringIO()
csv_str = StringIO()
count = 0
# Get Offset from previous read
s3_last_offset = self.get_s3_offset()
(last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
last_offset = last_offset if last_offset else 0
# Resolve difference in offset (s3 offset does not carry over from day to day)
if s3_last_offset > last_offset:
last_offset = s3_last_offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
print(last_offset)
# Read from Offset
consumer.seek(last_offset, 0)
for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):
# Write tweets to StringIO
self.write_to_data_str(message, data_str, csv_str)
count += 1
last_offset += 1
# Store batch tweets to S3
self.write_to_s3(data_str, csv_str, last_offset)
# Track Kafka Offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
if count != self.BATCH_SIZE:
break
示例11: test_simple_consumer_failed_payloads
def test_simple_consumer_failed_payloads(self):
client = MagicMock()
consumer = SimpleConsumer(client, group=None,
topic='topic', partitions=[0, 1],
auto_commit=False)
def failed_payloads(payload):
return FailedPayloadsError(payload)
client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)
# This should not raise an exception
consumer.get_messages(5)
示例12: HBaseServer
class HBaseServer(threading.Thread):
"""
HBase thread that will continuously read from Kafka queue
"""
def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table):
threading.Thread.__init__(self)
self.kafka = KafkaClient(kafka_url)
self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
self.cons.seek(0,2)
self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port)
self.car_table = self.hbase_connect.table(hbase_table)
self.server_on_flag = True
self.m = None
self.payload = None
self.vin = None
self.time = None
self.data = None
self.row_key = None
self.count = 0
def run(self):
while self.server_on_flag:
self.m = self.cons.get_message(block=False)
if (self.m is not None):
self.payload = json.loads(self.m.message.value)
self.vin = str(self.payload['vin'])
self.time = str(self.payload['timestamp'])
self.data = str(self.payload['data'])
self.row_key = self.vin+self.time
try:
self.car_table.put(self.vin,{'user:mostrecent':self.time})
self.car_table.put(self.row_key,{'car:data':self.data})
self.count = self.count + 1
logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count))
except Exception as e:
logger.info('%s,Data Push into HBase unsuccessful...', e)
else:
sleep(1/5)
def shutdown(self):
self.server_on_flag = False
logger.info('HBase Server shutting down...')
示例13: test_simple_consumer_reset_partition_offset
def test_simple_consumer_reset_partition_offset(self):
client = MagicMock()
def mock_offset_request(payloads, **kwargs):
raise FailedPayloadsError(payloads[0])
client.send_offset_request.side_effect = mock_offset_request
consumer = SimpleConsumer(client, group='foobar',
topic='topic', partitions=[0, 1],
auto_commit=False)
# This should not raise an exception
self.assertEqual(consumer.reset_partition_offset(0), None)
示例14: RVIConsumer
class RVIConsumer(threading.Thread):
def __init__(self, kafka_addr, topic, vin, web_url):
threading.Thread.__init__(self)
self.kafka = KafkaClient(kafka_addr) #kafka_addr
self.cons = SimpleConsumer(self.kafka, None, topic)
self.cons.seek(0,2)
self.vin = vin
self.web_url = web_url
self.flag = True
self.count = 0
self.sleep_count = 0
self.headers = {'Content-Type' : 'application/json'}
def is_running(self):
return self.flag
def run(self):
while self.flag:
#cons = SimpleConsumer(kafka, None, 'rvi')
m = self.cons.get_message(block=False)
if (m is not None):
payload = json.loads(m.message.value)
if(payload['vin'] == self.vin):
self.sleep_count = 0
payloadtoweb = json.dumps(m.message.value)
r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers)
if (r.status_code is 200):
print m.message.value + " sent successfully\n"
else:
print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code)
self.shutdown()
else:
if (self.sleep_count > 100000):
print "No new data for %s... Timing out" % self.vin
self.shutdown()
time.sleep(1/5)
self.sleep_count = self.sleep_count + 1
def shutdown(self):
self.flag = False
requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers)
print "%s consumer thread shutting down" % self.vin
示例15: consume_save
def consume_save(group,topic):
i=0
tmp_save=open(tmp_file_path,"w")
while True:
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
# if not messages:
# print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
print message.message.value+"\n"
# file size > 20MB
if tmp_save.tell() > 20000000:
push_to_hdfs(tmp_file_path)
kafka_consumer.commit() # inform zookeeper of position in the kafka queu