当前位置: 首页>>代码示例>>Java>>正文


Java GroupWriteSupport类代码示例

本文整理汇总了Java中org.apache.parquet.hadoop.example.GroupWriteSupport的典型用法代码示例。如果您正苦于以下问题:Java GroupWriteSupport类的具体用法?Java GroupWriteSupport怎么用?Java GroupWriteSupport使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


GroupWriteSupport类属于org.apache.parquet.hadoop.example包,在下文中一共展示了GroupWriteSupport类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: generateEmptyWithSchema

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
      "message test { "
          + "required int32 int32_field; "
          + "required int64 int64_field; "
          + "required float float_field; "
          + "required double double_field; "
          + "required int64 timestamp_field (TIMESTAMP_MILLIS);"
          + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
      UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  writer.close();

  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:21,代码来源:ParseTestParquet.java

示例2: generateSparseParquetFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      if (i % 10 == 0) { g = g.append("int32_field", i); }
      if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
      if (i % 10 == 0) { g = g.append("int32_field2", i); }
      writer.write(g.append("row", i));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:ParseTestParquet.java

示例3: generateParquetFileWithNullCharacters

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional binary cat_field (UTF8); } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
      writer.write(g.append("cat_field", value));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:22,代码来源:ParseTestParquet.java

示例4: runMapReduceJob

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
    fileSystem.delete(parquetPath, true);
    fileSystem.delete(outputPath, true);
    {
      jobConf.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(jobConf, inputPath);
      jobConf.setNumReduceTasks(0);

      jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
      DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
      DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
      DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
      GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);

      jobConf.setMapperClass(DeprecatedMapper.class);
      mapRedJob = JobClient.runJob(jobConf);
    }
  }
 
开发者ID:apache,项目名称:parquet-mr,代码行数:21,代码来源:DeprecatedOutputFormatTest.java

示例5: prepareFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@BeforeClass
public static void prepareFile() throws IOException {
  cleanup();

  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory f = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = ExampleParquetWriter.builder(file)
      .withWriterVersion(PARQUET_1_0)
      .withCompressionCodec(GZIP)
      .withRowGroupSize(1024*1024)
      .withPageSize(1024)
      .enableDictionaryEncoding()
      .withDictionaryPageSize(2*1024)
      .withConf(conf)
      .build();
  writeData(f, writer);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:DictionaryFilterTest.java

示例6: readFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static List<Group> readFile(File f, Filter filter) throws IOException {
  Configuration conf = new Configuration();
  GroupWriteSupport.setSchema(schema, conf);

  ParquetReader<Group> reader =
      ParquetReader.builder(new GroupReadSupport(), new Path(f.getAbsolutePath()))
                   .withConf(conf)
                   .withFilter(filter)
                   .build();

  Group current;
  List<Group> users = new ArrayList<Group>();

  current = reader.read();
  while (current != null) {
    users.add(current);
    current = reader.read();
  }

  return users;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:PhoneBookWriter.java

示例7: writeAndTest

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static void writeAndTest(WriteContext context) throws IOException {
  // Create the configuration, and then apply the schema to our configuration.
  Configuration configuration = new Configuration();
  GroupWriteSupport.setSchema(context.schema, configuration);
  GroupWriteSupport groupWriteSupport = new GroupWriteSupport();

  // Create the writer properties
  final int blockSize = context.blockSize;
  final int pageSize = context.pageSize;
  final int dictionaryPageSize = pageSize;
  final boolean enableDictionary = context.enableDictionary;
  final boolean enableValidation = context.enableValidation;
  ParquetProperties.WriterVersion writerVersion = context.version;
  CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;

  ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath,
      groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize,
      enableDictionary, enableValidation, writerVersion, configuration);

  context.write(writer);
  writer.close();

  context.test();

  context.path.delete();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:27,代码来源:TestStatistics.java

示例8: run

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public void run() {
    Configuration conf = new Configuration();
    int blockSize = 1 * 1024;
    int pageSize = 1 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;
    Path basePath = new Path("file:///Users/Jelly/Developer/test");
    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
            "required binary id; " +
            "required binary content; " +
            "required int64 int64_field; " +
            "}");
    GroupWriteSupport writeSupport = new GroupWriteSupport();
    writeSupport.setSchema(schema, conf);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);

    try {
        ParquetWriter<Group> parquetWriter = new ParquetWriter(
                basePath,
                writeSupport,
                CompressionCodecName.UNCOMPRESSED,
                blockSize, pageSize, dictionaryPageSize,
                enableDictionary,
                validating,
                ParquetProperties.WriterVersion.PARQUET_2_0,
                conf);
        for (int i = 0; i < 50000; i++) {
            parquetWriter.write(groupFactory.newGroup()
                    .append("id", "10")
                    .append("content", "test" + i)
                    .append("int64_field", Long.valueOf(i)));
        }
        parquetWriter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java

示例9: test

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void test() throws IOException
{
    Type name = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, "name");
    Type age = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, "age");
    Type score = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, "score");
    Type student = new MessageType("student", Arrays.asList(name, age, score));
    MessageType schema = new MessageType("student", student);

    int blockSize = 256 * 1024 * 1024;
    int pageSize = 6 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;

    GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
    SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);

    Configuration conf = new Configuration();
    conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
    Path path = new Path("hdfs://127.0.0.1:9000/student.parquet");
    groupWriteSupport.setSchema(schema, conf);
    ParquetWriter parquetWriter = new ParquetWriter(
            path,
            groupWriteSupport,
            CompressionCodecName.UNCOMPRESSED,
            blockSize,
            pageSize,
            dictionaryPageSize,
            enableDictionary,
            validating,
            ParquetProperties.WriterVersion.PARQUET_2_0,
            conf);

}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:36,代码来源:ParquetWriterTest.java

示例10: initWriter

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static ParquetWriter<Group> initWriter(MessageType schema, String fileName) throws IOException {

    GroupWriteSupport.setSchema(schema, conf);

    ParquetWriter<Group> writer =
        new ParquetWriter<Group>(initFile(fileName),
            ParquetFileWriter.Mode.OVERWRITE,
            new GroupWriteSupport(),
            CompressionCodecName.SNAPPY,
            1024,
            1024,
            512,
            true, // enable dictionary encoding,
            false,
            ParquetProperties.WriterVersion.PARQUET_1_0, conf
        );

    return writer;
  }
 
开发者ID:axbaretto,项目名称:drill,代码行数:20,代码来源:ParquetSimpleTestFileGenerator.java

示例11: generateParquetFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
      "message test { "
          + "required int32 int32_field; "
          + "required int64 int64_field; "
          + "required float float_field; "
          + "required double double_field; "
          + "required int64 timestamp_field (TIMESTAMP_MILLIS);"
          + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
      UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      writer.write(fact.newGroup()
          .append("int32_field", 32 + i)
          .append("int64_field", 64L + i)
          .append("float_field", 1.0f + i)
          .append("double_field", 2.0d + i)
          .append("timestamp_field", date.getTime() + (i * 117))
      );
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:32,代码来源:ParseTestParquet.java

示例12: testWriteReadStatisticsAllNulls

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Test
public void testWriteReadStatisticsAllNulls() throws Exception {
  // this test assumes statistics will be read
  Assume.assumeTrue(!shouldIgnoreStatistics(Version.FULL_VERSION, BINARY));

  File testFile = temp.newFile();
  testFile.delete();

  writeSchema = "message example {\n" +
          "required binary content (UTF8);\n" +
          "}";

  Path path = new Path(testFile.toURI());

  MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
  Configuration configuration = new Configuration();
  configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
  GroupWriteSupport.setSchema(schema, configuration);

  ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport());

  Group r1 = new SimpleGroup(schema);
  writer.write(r1);
  writer.close();

  ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);

  // assert the statistics object is not empty
  org.apache.parquet.column.statistics.Statistics stats = readFooter.getBlocks().get(0).getColumns().get(0).getStatistics();
  assertFalse("is empty: " + stats, stats.isEmpty());
  // assert the number of nulls are correct for the first block
  assertEquals("nulls: " + stats, 1, stats.getNumNulls());
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:TestParquetFileWriter.java

示例13: writeFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
private static void writeFile(File out, Configuration conf, boolean useSchema2) throws IOException {
  if (!useSchema2) {
    GroupWriteSupport.setSchema(schema, conf);
  } else {
    GroupWriteSupport.setSchema(schema2, conf);
  }
  SimpleGroupFactory f = new SimpleGroupFactory(schema);

  Map<String, String> extraMetaData = new HashMap<String, String>();
  extraMetaData.put("schema_num", useSchema2 ? "2" : "1" );

  ParquetWriter<Group> writer = ExampleParquetWriter
      .builder(new Path(out.getAbsolutePath()))
      .withConf(conf)
      .withExtraMetaData(extraMetaData)
      .build();

    for (int i = 0; i < 1000; i++) {
      Group g = f.newGroup()
          .append("binary_field", "test" + i)
          .append("int32_field", i)
          .append("int64_field", (long) i)
          .append("boolean_field", i % 2 == 0)
          .append("float_field", (float) i)
          .append("double_field", (double)i)
          .append("flba_field", "foo");

      if (!useSchema2) {
        g = g.append("int96_field", Binary.fromConstantByteArray(new byte[12]));
      }

      writer.write(g);
    }
    writer.close();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:36,代码来源:TestMergeMetadataFiles.java

示例14: setUp

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
@Before
public void setUp() throws Exception {
  parquetOutputFormat = new ParquetOutputFormat(new GroupWriteSupport());

  GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), conf);
  expectedPoolSize = Math.round((double)
      ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() *
      MemoryManager.DEFAULT_MEMORY_POOL_RATIO);

  long rowGroupSize = expectedPoolSize / 2;
  conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);

  // the memory manager is not initialized until a writer is created
  createWriter(0).close(null);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:16,代码来源:TestMemoryManager.java

示例15: writeToFile

import org.apache.parquet.hadoop.example.GroupWriteSupport; //导入依赖的package包/类
public static void writeToFile(File f, List<User> users) throws IOException {
  Configuration conf = new Configuration();
  GroupWriteSupport.setSchema(schema, conf);

  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport());
  for (User u : users) {
    writer.write(groupFromUser(u));
  }
  writer.close();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:PhoneBookWriter.java


注:本文中的org.apache.parquet.hadoop.example.GroupWriteSupport类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。