本文整理汇总了Python中avro.datafile.DataFileReader类的典型用法代码示例。如果您正苦于以下问题:Python DataFileReader类的具体用法?Python DataFileReader怎么用?Python DataFileReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataFileReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read
def read(self, format):
time_start = time.time()
if format == 'json':
with open('./output/output.json') as file:
json.loads(file.read())
if format == 'jsch':
with open('./output/output.json') as file:
validate(json.loads(file.read()), self._schema_json)
elif format == 'avro':
reader = DataFileReader(open('./output/output.avro', 'r'), DatumReader())
for user in reader:
pass
reader.close()
elif format == 'protobuf':
with open('./output/output.pb', 'rb') as file:
addressbook_pb2.AddressBook().ParseFromString(file.read())
elif format == 'gzjson':
with gzip.open('./output/output.jsz', 'rb') as file:
json.loads(file.read())
time_end = time.time()
return time_end - time_start
示例2: deserializeDataFromFile2Str
def deserializeDataFromFile2Str(inputFile):
logging.debug("Deserializing file:"+inputFile)
reader = DataFileReader(open(inputFile, "r"), DatumReader())
data=""
for item in reader:
data=data+str(item)
reader.close()
return data
示例3: generic_dataframe
def generic_dataframe(self, df, avro_schema, assert_fns=None):
"""Generic test running function for arbitrary avro schemas.
Writes a dataframe containing the records to avro.
Reads back and compares with the original
"""
print(avro_schema)
cyavro.write_avro_file_from_dataframe(df, self.filename,
json.dumps(avro_schema),
codec='null'
)
if assert_fns is None:
assert_fns = {}
df_read = cyavro.read_avro_file_as_dataframe(self.filename)
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
with open(self.filename, 'rb') as fo:
reader = DataFileReader(fo, DatumReader())
records = []
for user in reader:
records.append(user)
df_reference = pd.DataFrame(records)
reader.close()
success = True
for col in avro_schema["fields"]:
colname = col['name']
assert_fn = assert_fns.get(colname, np.testing.assert_array_equal)
def print_fail_header(s):
print('#' * len(s))
print("FAIL: Column {}".format(col))
print('#' * len(s))
print(s)
try:
assert_fn(df_read[colname], df[colname])
except AssertionError:
print_fail_header("Failed for cyavro read comparison {}\n".format(col))
traceback.print_exc(file=sys.stdout)
success = False
try:
assert_fn(df_reference[colname], df[colname])
except AssertionError:
print_fail_header("Failed for cyavro write comparison {}\n".format(col))
traceback.print_exc(file=sys.stdout)
success = False
assert success
示例4: testRead
def testRead(filename):
fd = open(filename, 'rb')
datum_writer = DatumReader()
freader = DataFileReader(fd, datum_writer)
for datum in freader:
print datum['name'], datum['company']
print datum['website']
print
freader.close()
示例5: testRead
def testRead(filename):
fd = open(filename, 'rb')
datum = DatumReader()
reader = DataFileReader(fd, datum)
for record in reader:
print record['name'], record['age']
reader.close()
示例6: main
def main():
if len(sys.argv) < 3:
print "Usage:", sys.argv[0]
print "add [num of events to add] filename"
print "list filename"
exit(1)
command = sys.argv[1]
if command == 'add':
noEvents = sys.argv[2]
filename = sys.argv[3]
# load existing events
existingEvents = {}
try:
reader = DataFileReader(open(filename, "rb"), DatumReader())
existingEvents = reader
reader.close()
except IOError:
print filename + ": Could not open file. Creating a new one."
# Write back out to disk
try:
schema = avro.schema.parse(open("etc/userevent.avsc").read())
f = open(filename, "w")
writer = DataFileWriter(f, DatumWriter(), schema)
# Append new user events
for i in range(0, int(noEvents)):
newEvent = createUserEvent()
print newEvent
writer.append(newEvent)
writer.close()
print "Wrote {0} user events".format(noEvents)
except IOError:
print filename + ": Could not save file."
elif command == 'list':
listAllUserEvents(sys.argv[2])
else:
print "Unregistered command. Exiting"
sys.exit(1)
示例7: loadOldData
def loadOldData(filename):
oldDataDict = dict()
if not os.path.isfile(filename):
return oldDataDict
reader = DataFileReader(open(filename, "r"), DatumReader())
for weight in reader:
oldDataDict[weight["site"]] = weight["weight"]
reader.close()
return oldDataDict
示例8: main
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:s:", ["help", "input-file=",
"schema="])
except getopt.GetoptError as err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage(sys.argv[0])
sys.exit(2)
avro_file = None
avro_schema_file = None
required_cl = 0
for o, a in opts:
if o in ("-h", "--help"):
usage(sys.argv[0])
sys.exit()
elif o in ("-i", "--input-file"):
required_cl += 1
avro_file = a
elif o in ("-s", "--schema"):
avro_schema_file = a
else:
assert False, "unhandled option"
if (required_cl < 1):
print "ERROR: Missing required argument"
usage(sys.argv[0])
sys.exit(1)
if not avro_schema_file:
reader = DataFileReader(open(avro_file, "r"), DatumReader())
for datum in reader:
print datum
reader.close()
else:
reader_schema = open(avro_schema_file, "r")
avro_schema = reader_schema.read()
reader_schema.close()
parsed_avro_schema = avro.schema.parse(avro_schema)
with open(avro_file, "rb") as reader_data:
inputio = io.BytesIO(reader_data.read())
decoder = avro.io.BinaryDecoder(inputio)
reader = avro.io.DatumReader(parsed_avro_schema)
while inputio.tell() < len(inputio.getvalue()):
avro_datum = reader.read(decoder)
print avro_datum
reader_data.close()
示例9: listAllUserEvents
def listAllUserEvents(filename):
try:
reader = DataFileReader(open(filename, "r"), DatumReader())
for event in reader:
# Query uuids of events
print "event id: {0}, event data extra fields: {1}".format(event["uuid"], event["eventData"]["otherEventData"])
reader.close()
except IOError:
print filename + ": Could not open file. Exiting"
sys.exit(1)
示例10: main
def main():
"""Start of execution"""
#combine the schemas
known_schemas = avro.schema.Names()
types_schema = LoadAvsc("parameter_types.avsc", known_schemas)
param_schema = LoadAvsc("parameter.avsc", known_schemas)
print json.dumps(param_schema.to_json(avro.schema.Names()), indent=2)
#test the schema works
param_file = open("parameters.avro", "w")
writer = DataFileWriter(param_file, DatumWriter(), param_schema)
param_1 = {"name": "test", "description":"An Avro test.", "type":"int"}
param_2 = {"name": "test", "description":"An Avro test.", "type":"boolean"}
writer.append(param_1)
writer.append(param_2)
writer.close()
reader = DataFileReader(open("parameters.avro", "r"), DatumReader())
for parameter in reader:
print parameter
reader.close()
示例11: readAndWriteAvro
def readAndWriteAvro():
""" Unlike java, avro does not let you generate
code for Tweet in python. So only way to read and write
data is without using code generation"""
#Read the schema
schema = avro.schema.parse(open("tweet.avsc").read())
#write some data
writer = DataFileWriter(open("tweets.avro", "w"), DatumWriter(), schema)
writer.append({"tweetId": 5, "user": "user5", "text" : "Tweeting from python as well"})
writer.close()
#read the same data
tweets = DataFileReader(open("tweets.avro", "r"), DatumReader())
for tweet in tweets:
print tweet
tweets.close()
示例12: handle
def handle(self):
data = self.request.recv(8024).strip()
data = StringIO(data)
reader = DataFileReader(data, DatumReader())
for fileData in reader:
id = fileData['id']
data = fileData['data']
print fileData
if not fileDict.has_key(id):
fileDict[id] = open("./" + id, "w")
f = fileDict[id]
f.write(data)
f.flush()
reader.close()
示例13: read
def read(fin, fout=None, nrecords=0):
"Read given avro file according to its schema and dump on stdout its content"
reader = DataFileReader(open(fin, "r"), DatumReader())
fobj = open(fout, 'w') if fout else None
count = 0
if fobj:
fobj.write("[\n")
for rec in reader:
if fobj:
if count:
fobj.write(",\n")
fobj.write(json.dumps(rec))
else:
pprint.pprint(rec)
if nrecords and count >= nrecords:
break
count += 1
if fobj:
fobj.write("]\n")
fobj.close()
reader.close()
示例14: _get_jc_for_avro_input
def _get_jc_for_avro_input(self, file_in, job_conf):
jc = dict(job_conf)
if self.avro_input:
jc[AVRO_INPUT] = self.avro_input
reader = DataFileReader(file_in, DatumReader())
schema = reader.get_meta('avro.schema')
file_in.seek(0)
if self.avro_input == 'v':
jc[AVRO_VALUE_INPUT_SCHEMA] = schema
elif self.avro_input == 'k':
jc[AVRO_KEY_INPUT_SCHEMA] = schema
else:
schema_obj = json.loads(schema)
for field in schema_obj['fields']:
if field['name'] == 'key':
key_schema = field['type']
else:
value_schema = field['type']
jc[AVRO_KEY_INPUT_SCHEMA] = json.dumps(key_schema)
jc[AVRO_VALUE_INPUT_SCHEMA] = json.dumps(value_schema)
return jc
示例15: readFile
def readFile():
reader = DataFileReader(open("part-00000.avro", "r"), DatumReader())
for user in reader:
print user
reader.close()