当前位置: 首页>>代码示例>>Java>>正文


Java AvroParquetWriter类代码示例

本文整理汇总了Java中org.apache.parquet.avro.AvroParquetWriter的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetWriter类的具体用法?Java AvroParquetWriter怎么用?Java AvroParquetWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


AvroParquetWriter类属于org.apache.parquet.avro包,在下文中一共展示了AvroParquetWriter类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initWriter

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
private ParquetWriterWrapper initWriter(Path file, int recordLimit, final FileSystem fileSystem,
    final Configuration conf, final CompressionCodecName compression, final int blockSize,
    final int pageSize) throws IOException {
  numRecords.set(0);
  String[] columnNames = converterDescriptor.getColumnConverters().stream()
      .map(columnConverterDescriptor -> columnConverterDescriptor.getColumnName())
      .toArray(size -> new String[size]);

  Schema[] columnTypes = converterDescriptor.getColumnConverters().stream()
      .map(columnConverterDescriptor -> columnConverterDescriptor.getTypeDescriptor())
      .toArray(size -> new Schema[size]);

  avroRecord = ParquetUtils.createAvroRecordSchema(getTableName(), columnNames, columnTypes);

  // TODO confirm how without fs will it be able to write on hdfs
  writer = AvroParquetWriter.<GenericRecord>builder(file).withCompressionCodec(compression)
      .withPageSize(pageSize).withConf(conf).withSchema(avroRecord).build();

  return this;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:21,代码来源:ParquetWriterWrapper.java

示例2: generateAvroPrimitiveTypes

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
static File generateAvroPrimitiveTypes(File parentDir, String filename, int nrows, Date date) throws IOException {
  File f = new File(parentDir, filename);
  Schema schema = new Schema.Parser().parse(Resources.getResource("PrimitiveAvro.avsc").openStream());
  AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(new Path(f.getPath()), schema);
  try {
    DateFormat format = new SimpleDateFormat("yy-MMM-dd:hh.mm.ss.SSS aaa");
    for (int i = 0; i < nrows; i++) {
      GenericData.Record record = new GenericRecordBuilder(schema)
              .set("mynull", null)
              .set("myboolean", i % 2 == 0)
              .set("myint", 1 + i)
              .set("mylong", 2L + i)
              .set("myfloat", 3.1f + i)
              .set("mydouble", 4.1 + i)
              .set("mydate", format.format(new Date(date.getTime() - (i * 1000 * 3600))))
              .set("myuuid", UUID.randomUUID())
              .set("mystring", "hello world: " + i)
              .set("myenum", i % 2 == 0 ? "a" : "b")
              .build();
      writer.write(record);
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:27,代码来源:ParseTestParquet.java

示例3: createDataFile

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
private static Path createDataFile() throws IOException {
    File parquetFile = File.createTempFile("test-", "." + FILE_EXTENSION);
    readerSchema = new Schema.Parser().parse(
            ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
    projectionSchema = new Schema.Parser().parse(
            ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people_projection.avsc"));

    try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
            .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {

        IntStream.range(0, NUM_RECORDS).forEach(index -> {
            GenericRecord datum = new GenericData.Record(readerSchema);
            datum.put(FIELD_INDEX, index);
            datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
            datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
            try {
                OFFSETS_BY_INDEX.put(index, Long.valueOf(index));
                writer.write(datum);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        });
    }
    Path path = new Path(new Path(fsUri), parquetFile.getName());
    fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path);
    return path;
}
 
开发者ID:mmolimar,项目名称:kafka-connect-fs,代码行数:28,代码来源:ParquetFileReaderTest.java

示例4: getParquetFileStream

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
public ParquetWriter<GenericRecord> getParquetFileStream() throws IOException {
  Schema avroSchema = getAvroSchema();
  Path file = new Path("/tmp/data/EmployeeData" + fileIndex++ + ".parquet");
  // create avro schema
  ParquetWriter<GenericRecord> parquetWriter =
      AvroParquetWriter.<GenericRecord>builder(file).withSchema(avroSchema).build();

  return parquetWriter;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:10,代码来源:MTableCDCParquetListener.java

示例5: write

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
/**
 * 将avro格式的数据写入到parquet文件中
 *
 * @param parquetPath
 */
public void write(String parquetPath) {
    Schema.Parser parser = new Schema.Parser();
    try {
        Schema schema = parser.parse(AvroParquetOperation.class.getClassLoader().getResourceAsStream("StringPair.avsc"));
        GenericRecord datum = new GenericData.Record(schema);
        datum.put("left", "L");
        datum.put("right", "R");

        Path path = new Path(parquetPath);
        System.out.println(path);
        AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(path, schema);
        writer.write(datum);
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:mumuhadoop,项目名称:mumu-parquet,代码行数:23,代码来源:AvroParquetOperation.java

示例6: call

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
public Job call() throws Exception {
  // We're explicitly disabling speculative execution
  conf.set("mapreduce.map.speculative", "false");
  conf.set("mapreduce.map.maxattempts", "1");
  MapreduceUtils.addJarsToJob(conf,
    SemanticVersion.class,
    ParquetWriter.class,
    AvroParquetWriter.class,
    FsInput.class,
    CompressionCodec.class,
    ParquetProperties.class,
    BytesInput.class
  );

  Job job = Job.getInstance(conf);

  // IO formats
  job.setInputFormatClass(AvroParquetInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  // Mapper & job output
  job.setMapperClass(AvroParquetConvertMapper.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  // It's map only job
  job.setNumReduceTasks(0);

  // General configuration
  job.setJarByClass(getClass());

  return job;
}
 
开发者ID:streamsets,项目名称:datacollector,代码行数:35,代码来源:AvroParquetConvertCreator.java

示例7: AvroParquetFileWriter

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
public AvroParquetFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException {
    Path path = new Path(logFilePath.getLogFilePath());
    LOG.debug("Creating Brand new Writer for path {}", path);
    CompressionCodecName codecName = CompressionCodecName
            .fromCompressionCodec(codec != null ? codec.getClass() : null);
    topic = logFilePath.getTopic();
    // Not setting blockSize, pageSize, enableDictionary, and validating
    writer = AvroParquetWriter.builder(path)
            .withSchema(schemaRegistryClient.getSchema(topic))
            .withCompressionCodec(codecName)
            .build();
}
 
开发者ID:pinterest,项目名称:secor,代码行数:13,代码来源:AvroParquetFileReaderWriterFactory.java

示例8: run

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
  Preconditions.checkArgument(targets != null && targets.size() == 1,
      "CSV path is required.");

  if (header != null) {
    // if a header is given on the command line, don't assume one is in the file
    noHeader = true;
  }

  CSVProperties props = new CSVProperties.Builder()
      .delimiter(delimiter)
      .escape(escape)
      .quote(quote)
      .header(header)
      .hasHeader(!noHeader)
      .linesToSkip(linesToSkip)
      .charset(charsetName)
      .build();

  String source = targets.get(0);

  Schema csvSchema;
  if (avroSchemaFile != null) {
    csvSchema = Schemas.fromAvsc(open(avroSchemaFile));
  } else {
    Set<String> required = ImmutableSet.of();
    if (requiredFields != null) {
      required = ImmutableSet.copyOf(requiredFields);
    }

    String filename = new File(source).getName();
    String recordName;
    if (filename.contains(".")) {
      recordName = filename.substring(0, filename.indexOf("."));
    } else {
      recordName = filename;
    }

    csvSchema = AvroCSV.inferNullableSchema(
        recordName, open(source), props, required);
  }

  long count = 0;
  try (AvroCSVReader<Record> reader = new AvroCSVReader<>(
      open(source), props, csvSchema, Record.class, true)) {
      CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
    try (ParquetWriter<Record> writer = AvroParquetWriter
        .<Record>builder(qualifiedPath(outputPath))
        .withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0)
        .withWriteMode(overwrite ?
            ParquetFileWriter.Mode.OVERWRITE : ParquetFileWriter.Mode.CREATE)
        .withCompressionCodec(codec)
        .withDictionaryEncoding(true)
        .withDictionaryPageSize(dictionaryPageSize)
        .withPageSize(pageSize)
        .withRowGroupSize(rowGroupSize)
        .withDataModel(GenericData.get())
        .withConf(getConf())
        .withSchema(csvSchema)
        .build()) {
      for (Record record : reader) {
        writer.write(record);
      }
    } catch (RuntimeException e) {
      throw new RuntimeException("Failed on record " + count, e);
    }
  }

  return 0;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:73,代码来源:ConvertCSVCommand.java

示例9: run

import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
  Preconditions.checkArgument(targets != null && targets.size() == 1,
      "A data file is required.");

  String source = targets.get(0);

  CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);

  Schema schema;
  if (avroSchemaFile != null) {
    schema = Schemas.fromAvsc(open(avroSchemaFile));
  } else {
    schema = getAvroSchema(source);
  }
  Schema projection = filterSchema(schema, columns);

  Path outPath = qualifiedPath(outputPath);
  FileSystem outFS = outPath.getFileSystem(getConf());
  if (overwrite && outFS.exists(outPath)) {
    console.debug("Deleting output file {} (already exists)", outPath);
    outFS.delete(outPath);
  }

  Iterable<Record> reader = openDataFile(source, projection);
  boolean threw = true;
  long count = 0;
  try {
    try (ParquetWriter<Record> writer = AvroParquetWriter
        .<Record>builder(qualifiedPath(outputPath))
        .withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0)
        .withConf(getConf())
        .withCompressionCodec(codec)
        .withRowGroupSize(rowGroupSize)
        .withDictionaryPageSize(dictionaryPageSize < 64 ? 64 : dictionaryPageSize)
        .withDictionaryEncoding(dictionaryPageSize != 0)
        .withPageSize(pageSize)
        .withDataModel(GenericData.get())
        .withSchema(projection)
        .build()) {
      for (Record record : reader) {
        writer.write(record);
        count += 1;
      }
    }
    threw = false;
  } catch (RuntimeException e) {
    throw new RuntimeException("Failed on record " + count, e);
  } finally {
    if (reader instanceof Closeable) {
      Closeables.close((Closeable) reader, threw);
    }
  }

  return 0;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:58,代码来源:ConvertCommand.java


注:本文中的org.apache.parquet.avro.AvroParquetWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。