本文整理汇总了Python中kafka.SimpleConsumer.get_messages方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.get_messages方法的具体用法?Python SimpleConsumer.get_messages怎么用?Python SimpleConsumer.get_messages使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.SimpleConsumer
的用法示例。
在下文中一共展示了SimpleConsumer.get_messages方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_switch_leader_simple_consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def test_switch_leader_simple_consumer(self):
producer = Producer(self.client, async=False)
consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
self._send_random_messages(producer, self.topic, 0, 2)
consumer.get_messages()
self._kill_leader(self.topic, 0)
consumer.get_messages()
示例2: test_simple_consumer_failed_payloads
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def test_simple_consumer_failed_payloads(self):
client = MagicMock()
consumer = SimpleConsumer(client, group=None,
topic='topic', partitions=[0, 1],
auto_commit=False)
def failed_payloads(payload):
return FailedPayloadsError(payload)
client.send_fetch_request.side_effect = self.fail_requests_factory(failed_payloads)
# This should not raise an exception
consumer.get_messages(5)
示例3: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
timestamp = standardized_timestamp(frequency)
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
#open file for writing
tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
tempfile = open(tempfile_path, "w")
#log_has_at_least_one = False #did we log at least one entry?
while True:
# get 1000 messages at a time, non blocking
messages = kafka_consumer.get_messages(count=100, block=False)
if not messages:
#print "no messages to read"
continue # If no messages are received, wait until there are more
for message in messages:
#log_has_at_least_one = True
#print(message.message.value)
#tempfile.write(message.message.value + "\n") # lose the '\n'?
tempfile.write(message.message.value)
if tempfile.tell() > 120000000: # file size > 120MB
print "Note: file is large enough to write to hdfs. Writing now..."
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() # inform zookeeper of position in the kafka queue
示例4: KafkaConsumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class KafkaConsumer(object):
def __init__(self, conf):
self.log = logging.getLogger(__name__)
self.client = KafkaClient(conf["kafka_server"])
self.total_inserts = 0
self.inserts = 0
self.listenstore = None
def start_listens(self, listenstore):
self.listenstore = listenstore
return self.start(b"listen-group", b"listens")
def start(self, group_name, topic_name):
self.group_name = group_name
self.topic_name = topic_name
self.log.info("KafkaConsumer subscribed to %s -> %s" % (group_name, topic_name))
self.consumer = SimpleConsumer(self.client, self.group_name, self.topic_name)
t0 = 0
last_offset = -1
while True:
listens = []
if t0 == 0:
t0 = time()
messages = self.consumer.get_messages(count=CASSANDRA_BATCH_SIZE, block=True, timeout=KAFKA_READ_TIMEOUT)
for message in messages:
try:
data = ujson.loads(message.message.value)
listens.append(Listen.from_json(data))
except ValueError as e:
self.log.error("Cannot parse JSON: %s\n'%s'" % (str(e), message.message.value))
continue
last_offset = message.offset
if listens:
broken = True
while broken:
try:
self.listenstore.insert_batch(listens)
broken = False
except ValueError as e:
self.log.error("Cannot insert listens: %s" % unicode(e))
broken = False
except NoHostAvailable as e:
self.log.error("Cannot insert listens: %s. Sleeping, trying again." % unicode(e))
sleep(5)
self.inserts += len(messages)
if self.inserts >= REPORT_FREQUENCY:
t1 = time()
self.total_inserts += self.inserts
self.log.info("Inserted %d rows in %.1fs (%.2f listens/sec). Total %d rows. last offset: %d" % \
(self.inserts, t1 - t0, self.inserts / (t1 - t0), self.total_inserts, last_offset))
self.inserts = 0
t0 = 0
示例5: test_ts
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def test_ts(self):
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
# consumer = SimpleConsumer(kafka, "my-group112", "test")
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)
while True:
print("HELLO")
# Prepare data for insert and copy to S3
# data_str = StringIO()
count = 0
# last_offset = 2
consumer.seek(2, 0)
for message in consumer.get_messages(count=100, block=False, timeout=0.1):
count += 1
print(message.message.value)
# # Write tweets to StringIO
# self.write_to_data_str(message, data_str)
# # Store batch tweets to S3
# self.write_to_s3(data_str, last_offset)
if count != 100:
break
示例6: ScoringWorker
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class ScoringWorker(object):
def __init__(self, settings, strategy_module):
kafka = KafkaClient(settings.get('KAFKA_LOCATION'))
self._producer = SimpleProducer(kafka, codec=CODEC_SNAPPY)
partition_id = settings.get('SCORING_PARTITION_ID')
if partition_id == None or type(partition_id) != int:
raise AttributeError("Scoring worker partition id isn't set.")
self._in_consumer = SimpleConsumer(kafka,
settings.get('SCORING_GROUP'),
settings.get('INCOMING_TOPIC'),
buffer_size=1048576,
max_buffer_size=10485760,
partitions=[partition_id])
self._manager = FrontierManager.from_settings(settings)
self._decoder = Decoder(self._manager.request_model, self._manager.response_model)
self._encoder = Encoder(self._manager.request_model)
self.consumer_batch_size = settings.get('CONSUMER_BATCH_SIZE', 128)
self.outgoing_topic = settings.get('SCORING_TOPIC')
self.strategy = strategy_module.CrawlingStrategy()
self.backend = self._manager.backend
self.stats = {}
self.cache_flush_counter = 0
self.job_id = 0
def work(self):
consumed = 0
batch = []
fingerprints = set()
try:
for m in self._in_consumer.get_messages(count=self.consumer_batch_size, block=True, timeout=1.0):
try:
msg = self._decoder.decode(m.message.value)
except (KeyError, TypeError), e:
logger.error("Decoding error: %s", e)
continue
else:
type = msg[0]
batch.append(msg)
if type == 'add_seeds':
_, seeds = msg
fingerprints.update(map(lambda x: x.meta['fingerprint'], seeds))
continue
if type == 'page_crawled':
_, response, links = msg
fingerprints.add(response.meta['fingerprint'])
fingerprints.update(map(lambda x: x.meta['fingerprint'], links))
continue
if type == 'request_error':
_, request, error = msg
fingerprints.add(request.meta['fingerprint'])
continue
raise TypeError('Unknown message type %s' % type)
finally:
consumed += 1
示例7: test_simple_consumer_unknown_topic_partition
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def test_simple_consumer_unknown_topic_partition(self):
client = MagicMock()
consumer = SimpleConsumer(client, group=None,
topic='topic', partitions=[0, 1],
auto_commit=False)
# Mock so that only the first request gets a valid response
def unknown_topic_partition(request):
return FetchResponsePayload(request.topic, request.partition,
UnknownTopicOrPartitionError.errno, -1, ())
client.send_fetch_request.side_effect = self.fail_requests_factory(unknown_topic_partition)
# This should not raise an exception
with self.assertRaises(UnknownTopicOrPartitionError):
consumer.get_messages(20)
示例8: consume_topic
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_topic(topic, group, output_dir, frequency):
global timestamp, tempfile_path, tempfile
print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
#get timestamp
timestamp = standardized_timestamp(frequency)
kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
#open file for writing
tempfile_path = "/tmp/kafka_stockTwits_%s_%s_%s_%s.dat" % (topic, group, timestamp, batch_counter)
tempfile = open(tempfile_path,"w")
log_has_at_least_one = False #did we log at least one entry?
while True:
messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
if not messages:
os.system("sleep 300s") # sleep 5mins
continue
for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
log_has_at_least_one = True
#print(message.message.value)
tempfile.write(message.message.value + "\n")
if tempfile.tell() > 10000000: #10000000: #file size > 10MB
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() #save position in the kafka queue
#exit loop
if log_has_at_least_one:
flush_to_hdfs(output_dir, topic)
kafka_consumer.commit() #save position in the kafka queue
return 0
示例9: consume_save
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
# tmp_save=open(tmp_file_path,"w")
while True:
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
if not messages:
print "Consumer didn't read any messages"
for message in messages:
# tmp_save.write( message.message.value+"\n")
print message.message.value+"\n"
示例10: test_simple_consumer_leader_change
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def test_simple_consumer_leader_change(self):
client = MagicMock()
consumer = SimpleConsumer(client, group=None,
topic='topic', partitions=[0, 1],
auto_commit=False)
# Mock so that only the first request gets a valid response
def not_leader(request):
return FetchResponsePayload(request.topic, request.partition,
NotLeaderForPartitionError.errno, -1, ())
client.send_fetch_request.side_effect = self.fail_requests_factory(not_leader)
# This should not raise an exception
consumer.get_messages(20)
# client should have updated metadata
self.assertGreaterEqual(client.reset_topic_metadata.call_count, 1)
self.assertGreaterEqual(client.load_metadata_for_topics.call_count, 1)
示例11: consume_save
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
tmp_save=open(tmp_file_path,"w")
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
if not messages:
print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
# print message.message.value+"\n"
kafka_consumer.commit() # inform zookeeper of position in the kafka queu
print ".... ... .. .."
print "Message from topic \"%s\" consumed \n" % topic
示例12: run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def run(self, options=None):
# try:
# Create table if it doesn't exist in the database
if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
buffer_size=2000000000, max_buffer_size=2000000000)
while True:
# Prepare data for insert and copy to S3
data_str = StringIO()
csv_str = StringIO()
count = 0
# Get Offset from previous read
s3_last_offset = self.get_s3_offset()
(last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
last_offset = last_offset if last_offset else 0
# Resolve difference in offset (s3 offset does not carry over from day to day)
if s3_last_offset > last_offset:
last_offset = s3_last_offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
print(last_offset)
# Read from Offset
consumer.seek(last_offset, 0)
for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):
# Write tweets to StringIO
self.write_to_data_str(message, data_str, csv_str)
count += 1
last_offset += 1
# Store batch tweets to S3
self.write_to_s3(data_str, csv_str, last_offset)
# Track Kafka Offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
if count != self.BATCH_SIZE:
break
示例13: Consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
class Consumer(BaseStreamConsumer):
"""
Used in DB and SW worker. SW consumes per partition.
"""
def __init__(self, conn, topic, group, partition_id):
self._conn = conn
self._group = group
self._topic = topic
self._partition_ids = [partition_id] if partition_id is not None else None
self._cons = None
self._connect_consumer()
def _connect_consumer(self):
if self._cons is None:
try:
self._cons = SimpleConsumer(
self._conn,
self._group,
self._topic,
partitions=self._partition_ids,
buffer_size=1048576,
max_buffer_size=10485760)
except BrokerResponseError:
self._cons = None
logger.warning("Could not connect consumer to Kafka server")
return False
return True
def get_messages(self, timeout=0.1, count=1):
if not self._connect_consumer():
yield
while True:
try:
for offmsg in self._cons.get_messages(
count,
timeout=timeout):
try:
yield offmsg.message.value
except ValueError:
logger.warning(
"Could not decode {0} message: {1}".format(
self._topic,
offmsg.message.value))
except Exception as err:
logger.warning("Error %s" % err)
finally:
break
def get_offset(self):
return 0
示例14: validate_samza_job
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def validate_samza_job():
"""
Validates that negate-number negated all messages, and sent the output to
samza-test-topic-output.
"""
logger.info("Running validate_samza_job")
kafka = util.get_kafka_client()
kafka.ensure_topic_exists(TEST_OUTPUT_TOPIC)
consumer = SimpleConsumer(kafka, "samza-test-group", TEST_OUTPUT_TOPIC)
messages = consumer.get_messages(count=NUM_MESSAGES, block=True, timeout=300)
message_count = len(messages)
assert NUM_MESSAGES == message_count, "Expected {0} lines, but found {1}".format(NUM_MESSAGES, message_count)
for message in map(lambda m: m.message.value, messages):
assert int(message) < 0, "Expected negative integer but received {0}".format(message)
kafka.close()
示例15: consume_save
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import get_messages [as 别名]
def consume_save(group,topic):
i=0
tmp_save=open(tmp_file_path,"w")
while True:
kafka_consumer=SimpleConsumer(kafka,group,topic)
messages= kafka_consumer.get_messages(count=1000, block=False)
# if not messages:
# print "Consumer didn't read any messages"
for message in messages:
tmp_save.write( message.message.value+"\n")
print message.message.value+"\n"
# file size > 20MB
if tmp_save.tell() > 20000000:
push_to_hdfs(tmp_file_path)
kafka_consumer.commit() # inform zookeeper of position in the kafka queu