Python datafile.DataFileReader类代码示例

本文整理汇总了Python中avro.datafile.DataFileReader类的典型用法代码示例。如果您正苦于以下问题：Python DataFileReader类的具体用法？Python DataFileReader怎么用？Python DataFileReader使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了DataFileReader类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read

  def read(self, format):
    time_start = time.time()

    if format == 'json':
      with open('./output/output.json') as file:
        json.loads(file.read())

    if format == 'jsch':
      with open('./output/output.json') as file:
        validate(json.loads(file.read()), self._schema_json)

    elif format == 'avro':
      reader = DataFileReader(open('./output/output.avro', 'r'), DatumReader())
      for user in reader:
        pass
      reader.close()

    elif format == 'protobuf':
      with open('./output/output.pb', 'rb') as file:
        addressbook_pb2.AddressBook().ParseFromString(file.read())

    elif format == 'gzjson':
      with gzip.open('./output/output.jsz', 'rb') as file:
        json.loads(file.read())

    time_end = time.time()

    return time_end - time_start

开发者ID:john-minted，项目名称:soa-benchmarks，代码行数:28，代码来源:benchmark.py

示例2: deserializeDataFromFile2Str

def deserializeDataFromFile2Str(inputFile):
	logging.debug("Deserializing file:"+inputFile)
	reader = DataFileReader(open(inputFile, "r"), DatumReader())
	data=""
	for item in reader:
		data=data+str(item)
	reader.close()
	return data

开发者ID:magsv，项目名称:m_etp_services，代码行数:8，代码来源:m_avro_utils.py

示例3: generic_dataframe

    def generic_dataframe(self, df, avro_schema, assert_fns=None):
        """Generic test running function for arbitrary avro schemas.

        Writes a dataframe containing the records to avro.

        Reads back and compares with the original
        """
        print(avro_schema)

        cyavro.write_avro_file_from_dataframe(df, self.filename,
                                              json.dumps(avro_schema),
                                              codec='null'
                                              )

        if assert_fns is None:
            assert_fns = {}

        df_read = cyavro.read_avro_file_as_dataframe(self.filename)

        import avro.schema
        from avro.datafile import DataFileReader, DataFileWriter
        from avro.io import DatumReader, DatumWriter

        with open(self.filename, 'rb') as fo:
            reader = DataFileReader(fo, DatumReader())
            records = []
            for user in reader:
                records.append(user)
            df_reference = pd.DataFrame(records)
            reader.close()

        success = True

        for col in avro_schema["fields"]:
            colname = col['name']
            assert_fn = assert_fns.get(colname, np.testing.assert_array_equal)

            def print_fail_header(s):
                print('#' * len(s))
                print("FAIL: Column {}".format(col))
                print('#' * len(s))
                print(s)

            try:
                assert_fn(df_read[colname], df[colname])
            except AssertionError:
                print_fail_header("Failed for cyavro read comparison  {}\n".format(col))
                traceback.print_exc(file=sys.stdout)
                success = False

            try:
                assert_fn(df_reference[colname], df[colname])
            except AssertionError:
                print_fail_header("Failed for cyavro write comparison {}\n".format(col))
                traceback.print_exc(file=sys.stdout)
                success = False

        assert success

开发者ID:MaxPoint，项目名称:cyavro，代码行数:58，代码来源:basic_types.py

示例4: testRead

def testRead(filename):
    fd = open(filename, 'rb')
    datum_writer = DatumReader()
    freader = DataFileReader(fd, datum_writer)
    for datum in freader:
        print datum['name'], datum['company']
        print datum['website']
        print
    freader.close()

开发者ID:3rwww1，项目名称:Hadoop，代码行数:9，代码来源:example1.py

示例5: testRead

def testRead(filename):
    fd = open(filename, 'rb')

    datum = DatumReader()
    reader = DataFileReader(fd, datum)

    for record in reader:
        print record['name'], record['age']

    reader.close()

开发者ID:3rwww1，项目名称:Hadoop，代码行数:10，代码来源:test-file.py

示例6: main

def main():

    if len(sys.argv) < 3:
        print "Usage:", sys.argv[0]
        print "add [num of events to add] filename"
        print "list filename"
        exit(1)

    command = sys.argv[1]

    if command == 'add':

        noEvents = sys.argv[2]
        filename = sys.argv[3]

        # load existing events

        existingEvents = {}

        try:
            reader = DataFileReader(open(filename, "rb"), DatumReader())
            existingEvents = reader
            reader.close()
        except IOError:
            print filename + ": Could not open file.  Creating a new one."

        # Write back out to disk

        try:

            schema = avro.schema.parse(open("etc/userevent.avsc").read())

            f = open(filename, "w")
            writer = DataFileWriter(f, DatumWriter(), schema)

            # Append new user events

            for i in range(0, int(noEvents)):
                newEvent = createUserEvent()
                print newEvent
                writer.append(newEvent)

            writer.close()

            print "Wrote {0} user events".format(noEvents)
        except IOError:
            print filename + ": Could not save file."

    elif command == 'list':

        listAllUserEvents(sys.argv[2])

    else:
        print "Unregistered command. Exiting"
        sys.exit(1)

开发者ID:tomford，项目名称:bbm-misc，代码行数:55，代码来源:process_user_events_avro.py

示例7: loadOldData

def loadOldData(filename):
    oldDataDict = dict()

    if not os.path.isfile(filename):
        return oldDataDict

    reader = DataFileReader(open(filename, "r"), DatumReader())
    for weight in reader:
        oldDataDict[weight["site"]] = weight["weight"]
    reader.close()

    return oldDataDict

开发者ID:vrdel，项目名称:argo-egi-connectors，代码行数:12，代码来源:weights-vapor-connector.py

示例8: main

def main():
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hi:s:", ["help", "input-file=",
						"schema="])
	except getopt.GetoptError as err:
		# print help information and exit:
		print str(err) # will print something like "option -a not recognized"
		usage(sys.argv[0])
		sys.exit(2)

	avro_file = None
	avro_schema_file = None

	required_cl = 0

	for o, a in opts:
		if o in ("-h", "--help"):
			usage(sys.argv[0])
			sys.exit()
		elif o in ("-i", "--input-file"):
			required_cl += 1
            		avro_file = a
		elif o in ("-s", "--schema"):
			avro_schema_file = a
		else:
			assert False, "unhandled option"

	if (required_cl < 1): 
		print "ERROR: Missing required argument"
		usage(sys.argv[0])
		sys.exit(1)

	if not avro_schema_file:
		reader = DataFileReader(open(avro_file, "r"), DatumReader())
		for datum in reader:
			print datum
		reader.close()
	else:
		reader_schema = open(avro_schema_file, "r")
		avro_schema = reader_schema.read()
		reader_schema.close()
		parsed_avro_schema = avro.schema.parse(avro_schema)

		with open(avro_file, "rb") as reader_data:
			inputio = io.BytesIO(reader_data.read())
			decoder = avro.io.BinaryDecoder(inputio)
			reader = avro.io.DatumReader(parsed_avro_schema)
			while inputio.tell() < len(inputio.getvalue()):
				avro_datum = reader.read(decoder)
				print avro_datum
		reader_data.close()

开发者ID:CodethinkLabs，项目名称:pmacct，代码行数:51，代码来源:avro_file_decoder.py

示例9: listAllUserEvents

def listAllUserEvents(filename):

    try:

        reader = DataFileReader(open(filename, "r"), DatumReader())
        for event in reader:

            # Query uuids of events
            print "event id: {0}, event data extra fields: {1}".format(event["uuid"], event["eventData"]["otherEventData"])

        reader.close()
    except IOError:
        print filename + ": Could not open file.  Exiting"
        sys.exit(1)

开发者ID:tomford，项目名称:bbm-misc，代码行数:14，代码来源:process_user_events_avro.py

示例10: main

def main():
  """Start of execution"""
  #combine the schemas 
  known_schemas = avro.schema.Names()
  types_schema = LoadAvsc("parameter_types.avsc", known_schemas)
  param_schema = LoadAvsc("parameter.avsc", known_schemas)
  print json.dumps(param_schema.to_json(avro.schema.Names()), indent=2) 
  #test the schema works 
  param_file = open("parameters.avro", "w")
  writer = DataFileWriter(param_file, DatumWriter(), param_schema)
  param_1 = {"name": "test", "description":"An Avro test.", "type":"int"}
  param_2 = {"name": "test", "description":"An Avro test.", "type":"boolean"}
  writer.append(param_1)
  writer.append(param_2)
  writer.close()
  reader = DataFileReader(open("parameters.avro", "r"), DatumReader())
  for parameter in reader:
      print parameter
  reader.close()

开发者ID:ivernaloo，项目名称:avro，代码行数:19，代码来源:avro_test.py

示例11: readAndWriteAvro

def readAndWriteAvro():
    """ Unlike java, avro does not let you generate
        code for Tweet in python. So only way to read and write
        data is without using code generation"""

    #Read the schema
    schema = avro.schema.parse(open("tweet.avsc").read())


    #write some data
    writer = DataFileWriter(open("tweets.avro", "w"), DatumWriter(), schema)
    writer.append({"tweetId": 5, "user": "user5", "text" : "Tweeting from python as well"})
    writer.close()

    #read the same data
    tweets = DataFileReader(open("tweets.avro", "r"), DatumReader())
    for tweet in tweets:
        print tweet
    tweets.close()

开发者ID:atinsood，项目名称:HESDataAnalyticsFinalProject，代码行数:19，代码来源:tweetExample.py

示例12: handle

    def handle(self):
        data = self.request.recv(8024).strip()
        data = StringIO(data)

        reader = DataFileReader(data, DatumReader())
        for fileData in reader:
            id = fileData['id']
            data = fileData['data']

            print fileData

            if not fileDict.has_key(id):
                fileDict[id] = open("./" + id, "w")

            f = fileDict[id]

            f.write(data)
            f.flush()
        reader.close()

开发者ID:team-herring，项目名称:herring-box，代码行数:19，代码来源:05.FileProtocolServer.py

示例13: read

def read(fin, fout=None, nrecords=0):
    "Read given avro file according to its schema and dump on stdout its content"
    reader = DataFileReader(open(fin, "r"), DatumReader())
    fobj = open(fout, 'w') if fout else None
    count = 0
    if  fobj:
        fobj.write("[\n")
    for rec in reader:
        if  fobj:
            if  count:
                fobj.write(",\n")
            fobj.write(json.dumps(rec))
        else:
            pprint.pprint(rec)
        if  nrecords and count >= nrecords:
            break
        count += 1
    if  fobj:
        fobj.write("]\n")
        fobj.close()
    reader.close()

开发者ID:yuyiguo，项目名称:WMArchive，代码行数:21，代码来源:pyavro.py

示例14: _get_jc_for_avro_input

    def _get_jc_for_avro_input(self, file_in, job_conf):

        jc = dict(job_conf)
        if self.avro_input:
            jc[AVRO_INPUT] = self.avro_input
            reader = DataFileReader(file_in, DatumReader())
            schema = reader.get_meta('avro.schema')
            file_in.seek(0)
            if self.avro_input == 'v':
                jc[AVRO_VALUE_INPUT_SCHEMA] = schema
            elif self.avro_input == 'k':
                jc[AVRO_KEY_INPUT_SCHEMA] = schema
            else:
                schema_obj = json.loads(schema)
                for field in schema_obj['fields']:
                    if field['name'] == 'key':
                        key_schema = field['type']
                    else:
                        value_schema = field['type']
                jc[AVRO_KEY_INPUT_SCHEMA] = json.dumps(key_schema)
                jc[AVRO_VALUE_INPUT_SCHEMA] = json.dumps(value_schema)

        return jc

开发者ID:kikkomep，项目名称:pydoop，代码行数:23，代码来源:simulator.py

示例15: readFile

def readFile():
    reader = DataFileReader(open("part-00000.avro", "r"), DatumReader())
    for user in reader:
        print user
    reader.close()

开发者ID:eselyavka，项目名称:python，代码行数:5，代码来源:test_avro.py

注：本文中的avro.datafile.DataFileReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。