本文整理汇总了Python中kafka.SimpleConsumer.seek方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.seek方法的具体用法?Python SimpleConsumer.seek怎么用?Python SimpleConsumer.seek使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.SimpleConsumer
的用法示例。
在下文中一共展示了SimpleConsumer.seek方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_ts
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def test_ts(self):
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
# consumer = SimpleConsumer(kafka, "my-group112", "test")
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC,
fetch_size_bytes=3000000, buffer_size=2000000000, max_buffer_size=2000000000)
while True:
print("HELLO")
# Prepare data for insert and copy to S3
# data_str = StringIO()
count = 0
# last_offset = 2
consumer.seek(2, 0)
for message in consumer.get_messages(count=100, block=False, timeout=0.1):
count += 1
print(message.message.value)
# # Write tweets to StringIO
# self.write_to_data_str(message, data_str)
# # Store batch tweets to S3
# self.write_to_s3(data_str, last_offset)
if count != 100:
break
示例2: setup_capture_new_messages_consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def setup_capture_new_messages_consumer(topic):
"""Seeks to the tail of the topic then returns a function that can
consume messages from that point.
"""
kafka = KafkaClient(get_config().cluster_config.broker_list)
group = str('data_pipeline_clientlib_test')
consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
consumer.seek(0, 2) # seek to tail, 0 is the offset, and 2 is the tail
yield consumer
kafka.close()
示例3: run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def run(self, options=None):
# try:
# Create table if it doesn't exist in the database
if self.REDSHIFT.if_table_exists(self.TABLE_NAME) is False:
self.REDSHIFT.execute(self.CREATE_TRACKING_TABLE)
kafka = KafkaClient(config.get("kafka.host1") + "," + config.get("kafka.host2"))
consumer = SimpleConsumer(kafka, self.GROUP_NAME, self.KAFKA_TOPIC, fetch_size_bytes=3000000,
buffer_size=2000000000, max_buffer_size=2000000000)
while True:
# Prepare data for insert and copy to S3
data_str = StringIO()
csv_str = StringIO()
count = 0
# Get Offset from previous read
s3_last_offset = self.get_s3_offset()
(last_offset) = self.REDSHIFT.select(self.GET_OFFSET_QUERY)[0][0]
last_offset = last_offset if last_offset else 0
# Resolve difference in offset (s3 offset does not carry over from day to day)
if s3_last_offset > last_offset:
last_offset = s3_last_offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
print(last_offset)
# Read from Offset
consumer.seek(last_offset, 0)
for message in consumer.get_messages(count=self.BATCH_SIZE, block=False, timeout=5):
# Write tweets to StringIO
self.write_to_data_str(message, data_str, csv_str)
count += 1
last_offset += 1
# Store batch tweets to S3
self.write_to_s3(data_str, csv_str, last_offset)
# Track Kafka Offset
self.REDSHIFT.execute(self.UPDATE_OFFSET_QUERY % (self.GROUP_NAME, self.PARTITION, last_offset))
if count != self.BATCH_SIZE:
break
示例4: HBaseServer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class HBaseServer(threading.Thread):
"""
HBase thread that will continuously read from Kafka queue
"""
def __init__(self, kafka_url, kafka_topic, hbase_url, hbase_thrift_port, hbase_table):
threading.Thread.__init__(self)
self.kafka = KafkaClient(kafka_url)
self.cons = SimpleConsumer(self.kafka, None, kafka_topic)
self.cons.seek(0,2)
self.hbase_connect = happybase.Connection(hbase_url,hbase_thrift_port)
self.car_table = self.hbase_connect.table(hbase_table)
self.server_on_flag = True
self.m = None
self.payload = None
self.vin = None
self.time = None
self.data = None
self.row_key = None
self.count = 0
def run(self):
while self.server_on_flag:
self.m = self.cons.get_message(block=False)
if (self.m is not None):
self.payload = json.loads(self.m.message.value)
self.vin = str(self.payload['vin'])
self.time = str(self.payload['timestamp'])
self.data = str(self.payload['data'])
self.row_key = self.vin+self.time
try:
self.car_table.put(self.vin,{'user:mostrecent':self.time})
self.car_table.put(self.row_key,{'car:data':self.data})
self.count = self.count + 1
logger.info('HBase Server: key: %s, table: %s, car{data: %s}. Message number: %s', self.row_key, 'rvi', self.data, str(self.count))
except Exception as e:
logger.info('%s,Data Push into HBase unsuccessful...', e)
else:
sleep(1/5)
def shutdown(self):
self.server_on_flag = False
logger.info('HBase Server shutting down...')
示例5: RVIConsumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class RVIConsumer(threading.Thread):
def __init__(self, kafka_addr, topic, vin, web_url):
threading.Thread.__init__(self)
self.kafka = KafkaClient(kafka_addr) #kafka_addr
self.cons = SimpleConsumer(self.kafka, None, topic)
self.cons.seek(0,2)
self.vin = vin
self.web_url = web_url
self.flag = True
self.count = 0
self.sleep_count = 0
self.headers = {'Content-Type' : 'application/json'}
def is_running(self):
return self.flag
def run(self):
while self.flag:
#cons = SimpleConsumer(kafka, None, 'rvi')
m = self.cons.get_message(block=False)
if (m is not None):
payload = json.loads(m.message.value)
if(payload['vin'] == self.vin):
self.sleep_count = 0
payloadtoweb = json.dumps(m.message.value)
r = requests.post(self.web_url, data=payloadtoweb, headers=self.headers)
if (r.status_code is 200):
print m.message.value + " sent successfully\n"
else:
print "%s is not available, status code:%d...shutting down now..."%(self.web_url,r.status_code)
self.shutdown()
else:
if (self.sleep_count > 100000):
print "No new data for %s... Timing out" % self.vin
self.shutdown()
time.sleep(1/5)
self.sleep_count = self.sleep_count + 1
def shutdown(self):
self.flag = False
requests.post(self.web_url, data=json.dumps({'vin':self.vin, 'data':'EOM'}), headers=self.headers)
print "%s consumer thread shutting down" % self.vin
示例6: read_kafka
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def read_kafka():
"""
read socialSignal, keep if non are zero, save to mongo social/socialSignal
:return:
"""
msg_buffer = dict()
ids = set()
in_kafka = KafkaClient(settings.IN_SOCIAL_SIGNAL_KAFKA)
consumer = SimpleConsumer(in_kafka, 'comment.pages1', 'comment.pages', max_buffer_size=20 * 1024 * 1024,
fetch_size_bytes=2 * 1024 * 1024, buffer_size=2 * 1024 * 1024)
consumer.seek(0, 0)
for msg in consumer:
if "001WxC6D" in msg.message.value:
print msg.message.value
示例7: Consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class Consumer(Thread):
def __init__(self, args=()):
super(Consumer, self).__init__()
self.host = args[0]
self.port = args[1]
self.topic = args[2]
print '[KafkaConsumer] host: {0}, port: {1}, topic: {2}'.format(self.host, self.port, self.topic)
self.consumer = None
self.consumer_keep_run = True
self.consumer_paused = False
self.consumer_subscribers = []
def run(self):
client = kafka_client(self.host, self.port)
self.consumer = SimpleConsumer(client, None, self.topic)
self.consumer.seek(0, 1)
while self.consumer_keep_run:
print '[KafkaConsumer] looping..'
if not self.consumer_paused:
for message in self.consumer.get_messages(block=False):
offset = message.offset
value = message.message.value
j_encoded = json.dumps({'offset': offset, 'message': value})
print '[KafkaConsumer] {}'.format(j_encoded)
for subscriber in self.consumer_subscribers:
IOLoop.instance().add_callback(partial(subscriber.send_message, j_encoded))
time.sleep(1)
def pause_consumer(self, paused):
self.consumer_paused = paused
def stop_consumer(self):
self.consumer_keep_run = False
def add_subscriber(self, subscriber):
self.consumer_subscribers.append(subscriber)
def remove_subscriber(self, subscriber):
self.consumer_subscribers.remove(subscriber)
def get_subscribers_length(self):
length = len(self.consumer_subscribers)
return length
def get_subscribers(self):
return self.subscribers
示例8: kafka_stream
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def kafka_stream():
# global visualization_topic
# topic = visualization_topic
# print "DEBUG stream topic: " + topic
topic = "web"
kafka = KafkaClient("localhost:9092")
consumer = SimpleConsumer(kafka, "python", topic)
consumer.seek(offset=0, whence=2)
# topic = None
def gen():
for message in consumer:
yield 'data: %s\n\n' %str(message.message.value)
print "DEBUG: Kafka Stream Connected"
return Response(gen(), mimetype="text/event-stream")
示例9: read_kafka
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def read_kafka(docid):
"""
read socialSignal, keep if non are zero, save to mongo social/socialSignal
:return:
"""
msg_buffer = dict()
ids = set()
in_kafka = KafkaClient(settings.IN_SOCIAL_SIGNAL_KAFKA)
consumer = SimpleConsumer(in_kafka, 'test0', TOPIC, max_buffer_size=20 * 1024 * 1024,
fetch_size_bytes=2 * 1024 * 1024, buffer_size=2 * 1024 * 1024)
consumer.seek(6000000, 0)
for msg in consumer:
if msg.offset % 100000 == 0:
print 'working on ', msg.offset
if docid in msg.message.value:
print msg.message.value
示例10: main
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def main():
"""
Usage:
dump_to_mongodb dump <topic> --host=<host> [--consumer=<consumer>]
"""
args = docopt(main.__doc__)
host = args["--host"]
print "=> Connecting to {0}...".format(host)
logger.info("=> Connecting to {0}...".format(host))
kafka = KafkaClient(host)
print "=> Connected."
logger.info("=> Connected.")
if args["dump"]:
topic = args["<topic>"]
consumer_id = args["--consumer"] or "dump_to_mongodb"
consumer = SimpleConsumer(kafka, consumer_id, topic,
buffer_size=1024*200, # 100kb
fetch_size_bytes=1024*200, # 100kb
max_buffer_size=None # eliminate big message errors
)
consumer.seek(0, 1)
while True:
try:
message = consumer.get_message()
if message is None:
time.sleep(1)
continue
val = message.message.value
logger.info("message.message.value== %s " % val)
print('val==', val)
try:
item = json.loads(val)
except:
continue
if 'meta' in item and 'collection_name' in item['meta']:
_insert_item_to_monggodb(item)
except:
traceback.print_exc()
break
kafka.close()
return 0
示例11: event_consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def event_consumer(kafka_config, lock, trigger_manager):
kafka_url = kafka_config['url']
group = kafka_config['event_group']
# read from the 'transformed_events_topic' in the future
# reading events sent from API POST Event now
topic = kafka_config['events_topic']
kafka = KafkaClient(kafka_url)
consumer = SimpleConsumer(kafka,
group,
topic,
auto_commit=True,
# auto_commit_every_n=None,
# auto_commit_every_t=None,
# iter_timeout=1,
fetch_size_bytes=kafka_config[
'events_fetch_size_bytes'],
buffer_size=kafka_config['events_buffer_size'],
max_buffer_size=kafka_config['events_max_buffer_size'])
consumer.seek(0, 2)
for e in consumer:
log.debug('Received an event')
offset, message = e
envelope = json.loads(message.value)
event = envelope['event']
# convert iso8601 string to a datetime for winchester
# Note: the distiller knows how to convert these, based on
# event_definitions.yaml
if 'timestamp' in event:
event['timestamp'] = iso8601.parse_date(
event['timestamp'],
default_timezone=None)
if 'launched_at' in event:
event['launched_at'] = iso8601.parse_date(
event['launched_at'],
default_timezone=None)
lock.acquire()
trigger_manager.add_event(event)
lock.release()
示例12: pipe_stream_definition_consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def pipe_stream_definition_consumer(kafka_config, lock, pipe):
kafka_url = kafka_config['url']
group = kafka_config['stream_def_pipe_group']
topic = kafka_config['stream_def_topic']
kafka = KafkaClient(kafka_url)
consumer = SimpleConsumer(kafka,
group,
topic,
auto_commit=True,
# auto_commit_every_n=None,
# auto_commit_every_t=None,
# iter_timeout=1,
fetch_size_bytes=kafka_config[
'events_fetch_size_bytes'],
buffer_size=kafka_config['events_buffer_size'],
max_buffer_size=kafka_config['events_max_buffer_size'])
consumer.seek(0, 2)
for s in consumer:
offset, message = s
stream_def = json.loads(message.value)
if 'stream-definition-created' in stream_def:
log.debug('Received a stream-definition-created event')
stream_create = event_processor.stream_def_to_winchester_format(
stream_def['stream-definition-created'])
lock.acquire()
pipe.add_trigger_definition(stream_create)
lock.release()
elif 'stream-definition-deleted' in stream_def:
log.debug('Received a stream-definition-deleted event')
name = event_processor.stream_unique_name(
stream_def['stream-definition-deleted'])
lock.acquire()
pipe.delete_trigger_definition(name)
lock.release()
else:
log.error('Unknown event received on stream_def_topic')
示例13: event_consumer
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def event_consumer(self, conf, lock, trigger_manager):
kafka_url = conf.kafka.url
group = conf.kafka.event_group
topic = conf.kafka.events_topic
kafka = KafkaClient(kafka_url)
consumer = SimpleConsumer(kafka, group, topic, auto_commit=True)
consumer.seek(0, 2)
statsd = monascastatsd.Client(name="monasca", dimensions=self.dimensions)
events_consumed = statsd.get_counter("events_consumed")
events_persisted = statsd.get_counter("events_persisted")
for e in consumer:
log.debug("Received an event")
events_consumed.increment()
offset, message = e
envelope = json.loads(message.value)
event = envelope["event"]
if "timestamp" in event:
event["timestamp"] = iso8601.parse_date(event["timestamp"], default_timezone=None)
lock.acquire()
try:
# should have add_event return True or False
prev_saved_events = trigger_manager.saved_events
trigger_manager.add_event(event)
if trigger_manager.saved_events > prev_saved_events:
events_persisted.increment()
else:
log.warning("Invalid or Duplicate Event. " "Could not add_event to mysql.")
except Exception as e:
log.exception(e)
finally:
lock.release()
示例14: TestRedisMonitor
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
class TestRedisMonitor(TestCase):
maxDiff = None
queue_key = "link:istresearch.com:queue"
def setUp(self):
self.redis_monitor = RedisMonitor("localsettings.py")
self.redis_monitor.settings = self.redis_monitor.wrapper.load("localsettings.py")
self.redis_monitor.logger = MagicMock()
self.redis_monitor.settings['KAFKA_TOPIC_PREFIX'] = "demo_test"
self.redis_monitor.settings['STATS_TOTAL'] = False
self.redis_monitor.settings['STATS_PLUGINS'] = False
self.redis_monitor.settings['PLUGINS'] = {
'plugins.info_monitor.InfoMonitor': None,
'plugins.stop_monitor.StopMonitor': None,
'plugins.expire_monitor.ExpireMonitor': None,
'tests.tests_online.CustomMonitor': 100,
}
self.redis_monitor.redis_conn = redis.Redis(
host=self.redis_monitor.settings['REDIS_HOST'],
port=self.redis_monitor.settings['REDIS_PORT'])
self.redis_monitor._load_plugins()
self.redis_monitor.stats_dict = {}
self.kafka_conn = KafkaClient(self.redis_monitor.settings[
'KAFKA_HOSTS'])
self.kafka_conn.ensure_topic_exists("demo_test.outbound_firehose")
self.consumer = SimpleConsumer(
self.kafka_conn,
"demo-id",
"demo_test.outbound_firehose"
)
def test_process_item(self):
# we only want to go to the end now, not after this test is ran
self.consumer.seek(0, 2)
# set the info flag
key = "info-test:blah"
value = "ABC123"
self.redis_monitor.redis_conn.set(key, value)
# process the request
plugin = self.redis_monitor.plugins_dict.items()[0][1]
self.redis_monitor._process_plugin(plugin)
# ensure the key is gone
self.assertEquals(self.redis_monitor.redis_conn.get(key), None)
def test_sent_to_kafka(self):
success = {
u'info-test': "ABC123",
u"appid": u"someapp"
}
# ensure it was sent out to kafka
message_count = 0
for message in self.consumer.get_messages():
if message is None:
break
else:
the_dict = json.loads(message.message.value)
self.assertEquals(success, the_dict)
message_count += 1
self.assertEquals(message_count, 1)
示例15: _run
# 需要导入模块: from kafka import SimpleConsumer [as 别名]
# 或者: from kafka.SimpleConsumer import seek [as 别名]
def _run(self):
pcount = 0
pause = False
while True:
try:
if pause:
gevent.sleep(2)
pause = False
self._logger.error("New KafkaClient %s" % self._topic)
self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
try:
consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
#except:
except Exception as ex:
template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.error("Error: %s trace %s" % \
(messag, traceback.format_exc()))
raise RuntimeError(messag)
self._logger.error("Starting %s" % self._topic)
# Find the offset of the last message that has been queued
consumer.seek(-1,2)
try:
mi = consumer.get_message(timeout=0.1)
consumer.commit()
except common.OffsetOutOfRangeError:
mi = None
#import pdb; pdb.set_trace()
self._logger.info("Last Queued for %s is %s" % \
(self._topic,str(mi)))
# start reading from last previously processed message
if mi != None:
consumer.seek(-1,1)
else:
consumer.seek(0,0)
if self._limit:
raise gevent.GreenletExit
while True:
try:
mlist = consumer.get_messages(10,timeout=0.5)
if not self.msg_handler(mlist):
raise gevent.GreenletExit
consumer.commit()
pcount += len(mlist)
except TypeError as ex:
self._logger.error("Type Error: %s trace %s" % \
(str(ex.args), traceback.format_exc()))
gevent.sleep(0.1)
except common.FailedPayloadsError as ex:
self._logger.error("Payload Error: %s" % str(ex.args))
gevent.sleep(0.1)
except gevent.GreenletExit:
break
except AssertionError as ex:
self._partoffset = ex
break
except Exception as ex:
template = "An exception of type {0} occured. Arguments:\n{1!r}"
messag = template.format(type(ex).__name__, ex.args)
self._logger.error("%s : traceback %s" % \
(messag, traceback.format_exc()))
self.stop_partition()
pause = True
self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
partdb = self.stop_partition()
return self._partoffset, partdb