当前位置: 首页>>代码示例>>Java>>正文


Java SimpleGroupFactory类代码示例

本文整理汇总了Java中org.apache.parquet.example.data.simple.SimpleGroupFactory的典型用法代码示例。如果您正苦于以下问题:Java SimpleGroupFactory类的具体用法?Java SimpleGroupFactory怎么用?Java SimpleGroupFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


SimpleGroupFactory类属于org.apache.parquet.example.data.simple包,在下文中一共展示了SimpleGroupFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public static void main(String[] args) throws IOException {

    SimpleGroupFactory sgf = new SimpleGroupFactory(simpleSchema);
    GroupFactory gf = new SimpleGroupFactory(complexSchema);
    SimpleGroupFactory sngf = new SimpleGroupFactory(simpleNullableSchema);
    GroupFactory ngf = new SimpleGroupFactory(complexNullableSchema);

    ParquetWriter<Group> simpleWriter = initWriter(simpleSchema, "drill/parquet_test_file_simple");
    ParquetWriter<Group> complexWriter = initWriter(complexSchema, "drill/parquet_test_file_complex");
    ParquetWriter<Group> simpleNullableWriter = initWriter(simpleNullableSchema, "drill/parquet_test_file_simple_nullable");
    ParquetWriter<Group> complexNullableWriter = initWriter(complexNullableSchema, "drill/parquet_test_file_complex_nullable");

    ParquetSimpleTestFileGenerator.writeSimpleValues(sgf, simpleWriter, false);
    ParquetSimpleTestFileGenerator.writeSimpleValues(sngf, simpleNullableWriter, true);
    ParquetSimpleTestFileGenerator.writeComplexValues(gf, complexWriter, false);
    ParquetSimpleTestFileGenerator.writeComplexValues(ngf, complexNullableWriter, true);

    simpleWriter.close();
    complexWriter.close();
    simpleNullableWriter.close();
    complexNullableWriter.close();

  }
 
开发者ID:axbaretto,项目名称:drill,代码行数:24,代码来源:ParquetSimpleTestFileGenerator.java

示例2: generateEmptyWithSchema

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
      "message test { "
          + "required int32 int32_field; "
          + "required int64 int64_field; "
          + "required float float_field; "
          + "required double double_field; "
          + "required int64 timestamp_field (TIMESTAMP_MILLIS);"
          + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
      UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  writer.close();

  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:21,代码来源:ParseTestParquet.java

示例3: generateSparseParquetFile

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      if (i % 10 == 0) { g = g.append("int32_field", i); }
      if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
      if (i % 10 == 0) { g = g.append("int32_field2", i); }
      writer.write(g.append("row", i));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:ParseTestParquet.java

示例4: generateParquetFileWithNullCharacters

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional binary cat_field (UTF8); } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
      writer.write(g.append("cat_field", value));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:22,代码来源:ParseTestParquet.java

示例5: testReadUsingRequestedSchemaWithIncompatibleField

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingRequestedSchemaWithIncompatibleField(){
  MessageType originalSchema = new MessageType("schema",
          new PrimitiveType(OPTIONAL, INT32, "e"));
  MemPageStore store = new MemPageStore(1);
  SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
  writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));

  try {
    MessageType schemaWithIncompatibleField = new MessageType("schema",
            new PrimitiveType(OPTIONAL, BINARY, "e")); // Incompatible schema: different type
    readGroups(store, originalSchema, schemaWithIncompatibleField, 1);
    fail("should have thrown an incompatible schema exception");
  } catch (ParquetDecodingException e) {
    assertEquals("The requested schema is not compatible with the file schema. incompatible types: optional binary e != optional int32 e", e.getMessage());
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:TestColumnIO.java

示例6: testReadUsingSchemaWithRequiredFieldThatWasOptional

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional(){
  MessageType originalSchema = new MessageType("schema",
          new PrimitiveType(OPTIONAL, INT32, "e"));
  MemPageStore store = new MemPageStore(1);
  SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
  writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));

  try {
    MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema",
            new PrimitiveType(REQUIRED, INT32, "e")); // Incompatible schema: required when it was optional
    readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
    fail("should have thrown an incompatible schema exception");
  } catch (ParquetDecodingException e) {
    assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:TestColumnIO.java

示例7: testReadUsingProjectedSchema

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingProjectedSchema(){
  MessageType orginalSchema = new MessageType("schema",
          new PrimitiveType(REQUIRED, INT32, "a"),
          new PrimitiveType(REQUIRED, INT32, "b")
  );
  MessageType projectedSchema = new MessageType("schema",
          new PrimitiveType(OPTIONAL, INT32, "b")
  );
  MemPageStore store = new MemPageStore(1);
  SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
  writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));

  {
    List<Group> groups = new ArrayList<Group>();
    groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
    Object[][] expected = {
            {2},
    };
    validateGroups(groups, expected);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TestColumnIO.java

示例8: testOneOfEach

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testOneOfEach() {
  MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
  GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
  Group g1 = gf.newGroup()
      .append("a", 1l)
      .append("b", 2)
      .append("c", 3.0f)
      .append("d", 4.0d)
      .append("e", true)
      .append("f", Binary.fromString("6"))
      .append("g", new NanoTime(1234, System.currentTimeMillis() * 1000))
      .append("h", Binary.fromString("abc"));

  testSchema(oneOfEachSchema, Arrays.asList(g1));
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestColumnIO.java

示例9: writeData

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
private static void writeData(SimpleGroupFactory f, ParquetWriter<Group> writer) throws IOException {
  for (int i = 0; i < nElements; i++) {
    int index = i % ALPHABET.length();

    Group group = f.newGroup()
        .append("binary_field", ALPHABET.substring(index, index+1))
        .append("single_value_field", "sharp")
        .append("int32_field", intValues[i % intValues.length])
        .append("int64_field", longValues[i % longValues.length])
        .append("double_field", toDouble(intValues[i % intValues.length]))
        .append("float_field", toFloat(intValues[i % intValues.length]))
        .append("plain_int32_field", i)
        .append("fallback_binary_field", i < (nElements / 2) ?
            ALPHABET.substring(index, index+1) : UUID.randomUUID().toString());

    writer.write(group);
  }
  writer.close();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:20,代码来源:DictionaryFilterTest.java

示例10: prepareFile

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@BeforeClass
public static void prepareFile() throws IOException {
  cleanup();

  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory f = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = ExampleParquetWriter.builder(file)
      .withWriterVersion(PARQUET_1_0)
      .withCompressionCodec(GZIP)
      .withRowGroupSize(1024*1024)
      .withPageSize(1024)
      .enableDictionaryEncoding()
      .withDictionaryPageSize(2*1024)
      .withConf(conf)
      .build();
  writeData(f, writer);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:DictionaryFilterTest.java

示例11: run

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public void run() {
    Configuration conf = new Configuration();
    int blockSize = 1 * 1024;
    int pageSize = 1 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;
    Path basePath = new Path("file:///Users/Jelly/Developer/test");
    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
            "required binary id; " +
            "required binary content; " +
            "required int64 int64_field; " +
            "}");
    GroupWriteSupport writeSupport = new GroupWriteSupport();
    writeSupport.setSchema(schema, conf);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);

    try {
        ParquetWriter<Group> parquetWriter = new ParquetWriter(
                basePath,
                writeSupport,
                CompressionCodecName.UNCOMPRESSED,
                blockSize, pageSize, dictionaryPageSize,
                enableDictionary,
                validating,
                ParquetProperties.WriterVersion.PARQUET_2_0,
                conf);
        for (int i = 0; i < 50000; i++) {
            parquetWriter.write(groupFactory.newGroup()
                    .append("id", "10")
                    .append("content", "test" + i)
                    .append("int64_field", Long.valueOf(i)));
        }
        parquetWriter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java

示例12: test

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void test() throws IOException
{
    Type name = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, "name");
    Type age = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, "age");
    Type score = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, "score");
    Type student = new MessageType("student", Arrays.asList(name, age, score));
    MessageType schema = new MessageType("student", student);

    int blockSize = 256 * 1024 * 1024;
    int pageSize = 6 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;

    GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
    SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);

    Configuration conf = new Configuration();
    conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
    Path path = new Path("hdfs://127.0.0.1:9000/student.parquet");
    groupWriteSupport.setSchema(schema, conf);
    ParquetWriter parquetWriter = new ParquetWriter(
            path,
            groupWriteSupport,
            CompressionCodecName.UNCOMPRESSED,
            blockSize,
            pageSize,
            dictionaryPageSize,
            enableDictionary,
            validating,
            ParquetProperties.WriterVersion.PARQUET_2_0,
            conf);

}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:36,代码来源:ParquetWriterTest.java

示例13: test

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void test() {
    SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);
    Group group = simpleGroupFactory.newGroup();
    for ( String[] s: schema.getPaths()) {
        System.out.println(s.length);
        for (String ss: s) {
            System.out.println(ss);
        }
    }
}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:12,代码来源:SchemaTest.java

示例14: generateParquetFile

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
      "message test { "
          + "required int32 int32_field; "
          + "required int64 int64_field; "
          + "required float float_field; "
          + "required double double_field; "
          + "required int64 timestamp_field (TIMESTAMP_MILLIS);"
          + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
      UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      writer.write(fact.newGroup()
          .append("int32_field", 32 + i)
          .append("int64_field", 64L + i)
          .append("float_field", 1.0f + i)
          .append("double_field", 2.0d + i)
          .append("timestamp_field", date.getTime() + (i * 117))
      );
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:32,代码来源:ParseTestParquet.java

示例15: GroupRecordConverter

import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public GroupRecordConverter(MessageType schema) {
  this.simpleGroupFactory = new SimpleGroupFactory(schema);
  this.root = new SimpleGroupConverter(null, 0, schema) {
    @Override
    public void start() {
      this.current = simpleGroupFactory.newGroup();
    }

    @Override
    public void end() {
    }
  };
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:14,代码来源:GroupRecordConverter.java


注:本文中的org.apache.parquet.example.data.simple.SimpleGroupFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。