本文整理汇总了Python中kafka.consumer.SimpleConsumer.commit方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleConsumer.commit方法的具体用法?Python SimpleConsumer.commit怎么用?Python SimpleConsumer.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.consumer.SimpleConsumer
的用法示例。
在下文中一共展示了SimpleConsumer.commit方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
def run(self):
client = KafkaClient(self.bootstrap_server, client_id='commandline')
consumer = SimpleConsumer(client, self.group, self.topic, auto_commit_every_n=1, buffer_size=160,
auto_commit=True)
for message in consumer:
now = datetime.now()
print("%s: %s" % (now, message))
consumer.commit()
示例2: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
def __init__(self, addr, group, topic):
"""Initialize Consumer with kafka broker IP, group, and topic."""
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic,
max_buffer_size=1310720000)
self.temp_file_path = None
self.temp_file = None
self.hadoop_path = "/insight/artsy/geo"
self.topic = topic
self.group = group
self.block_cnt = 0
def consume_topic(self, output_dir):
"""Consumes a stream of messages from the "post_geo_activity" topic.
Code template from https://github.com/ajmssc/bitcoin-inspector.git
"""
timestamp = time.strftime('%Y%m%d%H%M%S')
# open file for writing
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,self.topic,self.group,timestamp)
self.temp_file = open(self.temp_file_path,"w")
while True:
try:
# get 1000 messages at a time, non blocking
messages = self.consumer.get_messages(count=1000, block=False)
for message in messages:
self.temp_file.write(message.message.value + "\n")
# file size > 20MB
if self.temp_file.tell() > 20000000:
self.flush_to_hdfs(output_dir)
self.consumer.commit()
except:
# move to tail of kafka topic if consumer is referencing
# unknown offset
self.consumer.seek(0, 2)
def flush_to_hdfs(self, output_dir):
"""Flushes the 20MB file into HDFS."""
self.temp_file.close()
timestamp = time.strftime('%Y%m%d%H%M%S')
hadoop_fullpath = "%s/%s_%s_%s.dat" % (self.hadoop_path, self.group,self.topic, timestamp)
print "Block {}: Flushing data file to HDFS => {}".format(str(self.block_cnt),hadoop_fullpath)
self.block_cnt += 1
os.system("hdfs dfs -put %s %s" % (self.temp_file_path, hadoop_fullpath)) # save from local to hdfs
os.remove(self.temp_file_path) # remove temp local file
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,self.topic,self.group,timestamp)
self.temp_file = open(self.temp_file_path, "w")
示例3: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
def __init__(self, addr, group, topic):
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000, auto_offset_reset='smallest')
self.temp_file_path = None
self.temp_file = None
self.topic = topic
self.group = group
self.block_cnt = 0
def consume_topic(self):
timestamp = time.strftime('%Y%m%d%H%M%S')
#open file for writing
self.temp_file_path = "/home/ubuntu/datamill/kafka_%s_%s_%s.dat" % (self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path,"w")
header = 'experiment_id,job_id,results_file,package_id,package_name,worker_id,config_id,replicate_no,setup_time,run_time,collect_time,hw_cpu_arch,hw_cpu_mhz,hw_gpu_mhz,hw_num_cpus,hw_page_sz,hw_ram_mhz,hw_ram_sz,sw_address_randomization,sw_autogroup,sw_compiler,sw_drop_caches,sw_env_padding,sw_filesystem,sw_freq_scaling,sw_link_order,sw_opt_flag,sw_swap,sw_sys_time'
self.temp_file.write(header)
while True:
try:
messages = self.consumer.get_messages(count=100, block=False)
for message in messages:
self.temp_file.write(message.message.value + "\n")
if self.temp_file.tell() > 20000:
self.save_to_hdfs()
self.consumer.commit()
except:
self.consumer.seek(0, 2)
self.consumer.commit()
def save_to_hdfs(self):
self.temp_file.close()
timestamp = time.strftime('%Y%m%d%H%M%S')
hadoop_path = "/datamill/%s_%s_%s.csv" % (self.group, self.topic, timestamp)
print "Block " + str(self.block_cnt) + ": Saving file to HDFS " + hadoop_path
self.block_cnt += 1
# place blocked messages into history and cached folders on hdfs
os.system("hdfs dfs -put %s %s" % (self.temp_file_path, hadoop_path))
os.remove(self.temp_file_path)
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "/home/ubuntu/datamill/kafka_%s_%s_%s.dat" % (self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path, "w")
示例4: ZKConsumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
#.........这里部分代码省略.........
client_id=self.zkp._identifier)
else:
if self.consumer is None or \
sorted(my_partitions) != sorted(self.consumer.offsets.keys()):
self.logger.warn('Partitions changed, restarting Kafka consumer.')
self.consumer.stop()
else:
self.logger.info('Partitions unchanged, not restarting Kafka consumer.')
return
self.consumer = SimpleConsumer(self.client, self.group, self.topic,
partitions=my_partitions,
**self.consumer_kwargs)
self.consumer.provide_partition_info()
self.logger.info("Consumer connected to Kafka: %s", self.consumer.offsets)
def stop(self):
if self.consumer is not None:
self.logger.info('Stopping Kafka consumer')
self.consumer.stop()
self.consumer = None
if self.client is not None:
self.logger.info('Stopping Kafka client')
self.client.close()
self.client = None
if self.zk is not None:
self.logger.info('Stopping ZooKeeper client')
if self.zkp is not None and not self.zkp.failed:
self.zkp.finish()
self.zk.stop()
self.zkp = None
self.zk = None
def commit(self, partitions=None):
"""
Commit offsets for this consumer
partitions: list of partitions to commit, default is to commit
all of them
"""
if self.consumer is None:
return
self.logger.debug('Begin committing offsets for partitions: %s',
partitions if partitions else 'All')
self.consumer.commit(partitions)
self.logger.debug('End committing offsets for partitions: %s',
partitions if partitions else 'All')
def pending(self, partitions=None):
"""
Gets the pending message count
partitions: list of partitions to check for, default is to check all
"""
return self.consumer.pending(partitions)
def provide_partition_info(self):
"""
Indicates that partition info must be returned by the consumer
"""
self.consumer.provide_partition_info()
def seek(self, offset, whence):
"""
Alter the current offset in the consumer, similar to fseek
示例5: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
"""Kafka consumer class with functions to consume messages to HDFS.
Messages are blocked into 20MB files and transferred to HDFS
Attributes:
client: string representing IP:port of the kafka broker
consumer: Consumer object specifying the client group, and topic
temp_file_path: location of the 20MB file to be appended to before
transfer to HDFS
temp_file: File object opened from temp_file_path
topic: String representing the topic on Kafka
group: String representing the Kafka consumer group to be associated
with
block_cnt: integer representing the block count for print statements
"""
def __init__(self, addr, group, topic):
"""Initialize Consumer with kafka broker IP, group, and topic."""
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
self.temp_file_path = None
self.temp_file = None
self.hadoop_path = "/user/parking_data/history"
self.topic = topic
self.group = group
self.block_cnt = 0
def consume_topic(self, output_dir):
"""Consumes a stream of messages from the "messages" topic.
Code template from https://github.com/ajmssc/bitcoin-inspector.git
Args:
output_dir: string representing the directory to store the 20MB
before transferring to HDFS
Returns:
None
"""
timestamp = time.strftime("%Y%m%d%H%M%S")
# open file for writing
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir, self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path, "w")
# while True:
for ii in range(0, 2):
try:
# get 1000 messages at a time, non blocking
messages = self.consumer.get_messages(count=1000, block=False)
# OffsetAndMessage(offset=43, message=Message(magic=0,
# attributes=0, key=None, value='some message'))
for message in messages:
self.temp_file.write(message.message.value + "\n")
# file size > 20MB
if self.temp_file.tell() > 20000000:
self.flush_to_hdfs(output_dir)
self.consumer.commit()
except:
# move to tail of kafka topic if consumer is referencing
# unknown offset
self.consumer.seek(0, 2)
def flush_to_hdfs(self, output_dir):
"""Flushes the 20MB file into HDFS.
Code template from https://github.com/ajmssc/bitcoin-inspector.git
Flushes the file into HDFS folders
Args:
output_dir: string representing the directory to store the 20MB
before transferring to HDFS
Returns:
None
"""
self.temp_file.close()
timestamp = time.strftime("%Y%m%d%H%M%S")
hadoop_fullpath = "%s/%s_%s_%s.dat" % (self.hadoop_path, self.group, self.topic, timestamp)
print "Block {}: Flushing 20MB file to HDFS => {}".format(str(self.block_cnt), hadoop_fullpath)
self.block_cnt += 1
# place blocked messages into history and cached folders on hdfs
print ("hdfs dfs -put %s %s" % (self.temp_file_path, hadoop_fullpath))
os.system("sudo hdfs dfs -put %s %s" % (self.temp_file_path, hadoop_fullpath))
# os.system("sudo -u hdfs hdfs dfs -put %s %s" % (self.temp_file_path,
# cached_fullpath))
os.remove(self.temp_file_path)
timestamp = time.strftime("%Y%m%d%H%M%S")
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir, self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path, "w")
示例6: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
def __init__(self, addr, group, topic):
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
self.temp_file_path = None
self.temp_file = None
self.hadoop_path = "/user/AdReport/%s/history" %(topic)
self.cached_path = "/user/AdReport/%s/cached" % (topic)
self.topic = topic
self.group = group
self.block_cnt = 0
def consume_topic(self, output_dir):
timestamp = time.strftime('%Y%m%d%H%M%S')
#open file for writing
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,
self.topic,
self.group,
timestamp)
self.temp_file = open(self.temp_file_path,"w")
print ( self.temp_file)
#one_entry = False
while True:
try:
messages = self.consumer.get_messages(count=10, block=False)
#OffsetAndMessage(offset=43, message=Message(magic=0,
# attributes=0, key=None, value='some message'))
for message in messages:
print (message)
#one_entry = True
#print (self.temp_file.tell())
self.temp_file.write(message.message.value + "\n")
if self.temp_file.tell() > 2000000:
self.save_to_hdfs(output_dir)
self.consumer.commit()
except:
self.consumer.seek(0, 2)
#if one_entry:
#print ("sending to hdfs")
#self.save_to_hdfs(output_dir, self.topic)
#self.consumer.commit()
def save_to_hdfs(self, output_dir):
print ("Saving file to hdfs")
self.temp_file.close()
print ("Closed open file")
timestamp = time.strftime('%Y%m%d%H%M%S')
hadoop_fullpath = "%s/%s_%s_%s.dat" % (self.hadoop_path, self.group,
self.topic, timestamp)
cached_fullpath = "%s/%s_%s_%s.dat" % (self.cached_path, self.group,
self.topic, timestamp)
#print ("Block " + str(self.block_cnt) + ": Saving file to HDFS " + hadoop_fullpath)
self.block_cnt += 1
# place blocked messages into history and cached folders on hdfs
os.system("sudo -u ubuntu /usr/local/hadoop/bin/hdfs dfs -put %s %s" % (self.temp_file_path,
hadoop_fullpath))
os.system("sudo -u ubuntu /usr/local/hadoop/bin/hdfs dfs -put %s %s" % (self.temp_file_path,
cached_fullpath))
os.remove(self.temp_file_path)
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,
self.topic,
self.group,
timestamp)
self.temp_file = open(self.temp_file_path, "w")
示例7: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
def __init__(self, addr, group, topic):
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic, max_buffer_size=1310720000)
self.temp_file_path = None
self.temp_file = None
self.topic = topic
self.group = group
self.block_cnt = 0
def consume_topic(self, output_dir):
timestamp = time.strftime('%Y%m%d%H%M%S')
#open file for writing
self.temp_file_path = "/home/ubuntu/FantasyFootball/ingestion/kafka_%s_%s_%s.dat" % (self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path,"w")
one_entry = False
while True:
try:
messages = self.consumer.get_messages(count=100, block=False)
#OffsetAndMessage(offset=43, message=Message(magic=0,
# attributes=0, key=None, value='some message'))
for message in messages:
one_entry = True
self.tempfile.write(message.message.value + "\n")
if self.tempfile.tell() > 2000:
self.save_to_hdfs(output_dir)
self.consumer.commit()
except:
self.consumer.seek(0, 2)
if one_entry:
self.save_to_hdfs(output_dir, self.topic)
self.consumer.commit()
def save_to_hdfs(self, output_dir):
self.tempfile.close()
timestamp = time.strftime('%Y%m%d%H%M%S')
hadoop_path = "/user/solivero/playerpoints/history/%s_%s_%s.dat" % (self.group, self.topic, timestamp)
cached_path = "/user/solivero/playerpoints/cached/%s_%s_%s.dat" % (self.group, self.topic, timestamp)
print "Block " + str(self.block_cnt) + ": Saving file to HDFS " + hadoop_path
self.block_cnt += 1
# place blocked messages into history and cached folders on hdfs
os.system("sudo -u hdfs hdfs dfs -put %s %s" % (self.temp_file_path,hadoop_path))
os.system("sudo -u hdfs hdfs dfs -put %s %s" % (self.temp_file_path,cached_path))
os.remove(self.temp_file_path)
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "/home/ubuntu/fantasyfootball/ingestion/kafka_%s_%s_%s.dat" % (self.topic, self.group, timestamp)
self.temp_file = open(self.temp_file_path, "w")
示例8: Consumer
# 需要导入模块: from kafka.consumer import SimpleConsumer [as 别名]
# 或者: from kafka.consumer.SimpleConsumer import commit [as 别名]
class Consumer(object):
def __init__(self, addr, group, topic):
self.client = KafkaClient(addr)
self.consumer = SimpleConsumer(self.client, group, topic,
max_buffer_size=1310720000)
self.temp_file_path = None
self.temp_file = None
self.topic = topic
self.group = group
self.block_cnt = 0
os.system ( "hdfs dfs -mkdir /data2" )
def consume_topic(self, output_dir):
if not os.path.isdir ( output_dir ): os.makedirs ( output_dir )
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,
self.topic,
self.group,
timestamp)
self.temp_file = open(self.temp_file_path,"w")
while True:
try:
# get 1000 messages at a time, non blocking
messages = self.consumer.get_messages(count=1000, block=False)
# OffsetAndMessage(offset=43, message=Message(magic=0,
# attributes=0, key=None, value='some message'))
for message in messages:
self.temp_file.write(message.message.value + "\n")
# file size > 40MB
if self.temp_file.tell() > 40000000:
self.flush_to_hdfs(output_dir)
self.consumer.commit()
except:
# move to tail of kafka topic if consumer is referencing
# unknown offset
self.consumer.seek(0, 2)
def flush_to_hdfs(self, output_dir):
self.temp_file.close()
timestamp = time.strftime('%Y%m%d%H%M%S')
print "Block {}: Flushing 40MB file to HDFS => /data2".format(str(self.block_cnt))
self.block_cnt += 1
# place blocked messages into history and cached folders on hdfs
os.system("hdfs dfs -copyFromLocal %s %s" % (self.temp_file_path,
"/data2"))
os.remove(self.temp_file_path)
timestamp = time.strftime('%Y%m%d%H%M%S')
self.temp_file_path = "%s/kafka_%s_%s_%s.dat" % (output_dir,
self.topic,
self.group,
timestamp)
self.temp_file = open(self.temp_file_path, "w")