本文整理汇总了Python中kafka.KafkaConsumer.get_partition_offsets方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaConsumer.get_partition_offsets方法的具体用法?Python KafkaConsumer.get_partition_offsets怎么用?Python KafkaConsumer.get_partition_offsets使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kafka.KafkaConsumer
的用法示例。
在下文中一共展示了KafkaConsumer.get_partition_offsets方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from kafka import KafkaConsumer [as 别名]
# 或者: from kafka.KafkaConsumer import get_partition_offsets [as 别名]
def main():
# R0915: "too many statements in function (>50)"
# pylint: disable=R0915
if len(sys.argv) != 8:
print "Wrong number of arguments"
usage()
(kafka_topic, kafka_broker, mysql_host, mysql_port, mysql_user, mysql_password, mysql_table) = sys.argv[1:8]
sql_db = MySQLdb.connect(
host = mysql_host,
port = int(mysql_port),
user = mysql_user,
passwd = mysql_password)
query = sql_db.cursor()
client = KafkaClient(kafka_broker)
consumer = KafkaConsumer(kafka_topic, metadata_broker_list = [kafka_broker],
auto_commit_enable = False,
auto_offset_reset='smallest')
last_offsets = {}
partition_ids = client.get_partition_ids_for_topic(kafka_topic)
for partition in partition_ids:
offsets = consumer.get_partition_offsets(kafka_topic, partition, -1, 1)
print offsets
# Don't really understand this format, so put in asserts
# (Pdb) consumer.get_partition_offsets("appdb.bclab1.tivo.com", 0, -1, 1)
# (15471)
assert len(offsets) == 1
assert offsets[0] > 0
next_offset = offsets[0]
last_offset = next_offset - 1
last_offsets[partition] = last_offset
finished_partitions = set()
print last_offsets
count = 0
# mapping from primary key tuples, to row data
insert_batch = {}
insert_sql = None
for m in consumer:
if m.partition in finished_partitions:
continue
count += 1
payload = m.value
(first_line, rest) = payload.split("\r\n", 1)
(_notused, header_len, _body_len) = first_line.split(" ")
header_len = int(header_len)
body = rest[header_len:]
primary_key_str = m.key
# import pdb; pdb.set_trace()
primary_keys = json.loads(primary_key_str)
primary_tuples = sorted(primary_keys.items())
sorted_primary_key_names = [ k for (k,v) in primary_tuples ]
sorted_primary_key_values = [ int(v) for (k,v) in primary_tuples ]
if len(body) > 0:
# This is a write
data = json.loads(body)
# date fields have to be turned from a number back into a datetime object
date_fields = ['createDate', 'endTime', 'expectedDeletion', 'startTime', 'updateDate',
'availabilityWindowStart', 'availabilityWindowEnd',
'entitlementWindowStart', 'entitlementWindowEnd']
for d in date_fields:
if d not in data:
continue
val = data[d]
if val is None:
continue
if val == -62170156800000:
# this is hacky and a sign that i'm doing something wrong, I think.
val = "0000-00-00 00:00:00"
else:
val = val/1000
import datetime;
val = datetime.datetime.utcfromtimestamp(val)
data[d] = val
keys = [ k for (k, v) in sorted(data.items()) ]
values = [ v for (k, v) in sorted(data.items()) ]
keys_wo_primary = [ k for (k, v) in sorted(data.items()) ]
for p in sorted_primary_key_names:
keys_wo_primary.remove(p)
#.........这里部分代码省略.........