本文整理汇总了Java中org.apache.parquet.example.data.simple.SimpleGroupFactory类的典型用法代码示例。如果您正苦于以下问题:Java SimpleGroupFactory类的具体用法?Java SimpleGroupFactory怎么用?Java SimpleGroupFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
SimpleGroupFactory类属于org.apache.parquet.example.data.simple包,在下文中一共展示了SimpleGroupFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
SimpleGroupFactory sgf = new SimpleGroupFactory(simpleSchema);
GroupFactory gf = new SimpleGroupFactory(complexSchema);
SimpleGroupFactory sngf = new SimpleGroupFactory(simpleNullableSchema);
GroupFactory ngf = new SimpleGroupFactory(complexNullableSchema);
ParquetWriter<Group> simpleWriter = initWriter(simpleSchema, "drill/parquet_test_file_simple");
ParquetWriter<Group> complexWriter = initWriter(complexSchema, "drill/parquet_test_file_complex");
ParquetWriter<Group> simpleNullableWriter = initWriter(simpleNullableSchema, "drill/parquet_test_file_simple_nullable");
ParquetWriter<Group> complexNullableWriter = initWriter(complexNullableSchema, "drill/parquet_test_file_complex_nullable");
ParquetSimpleTestFileGenerator.writeSimpleValues(sgf, simpleWriter, false);
ParquetSimpleTestFileGenerator.writeSimpleValues(sngf, simpleNullableWriter, true);
ParquetSimpleTestFileGenerator.writeComplexValues(gf, complexWriter, false);
ParquetSimpleTestFileGenerator.writeComplexValues(ngf, complexNullableWriter, true);
simpleWriter.close();
complexWriter.close();
simpleNullableWriter.close();
complexNullableWriter.close();
}
示例2: generateEmptyWithSchema
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
writer.close();
return f;
}
示例3: generateSparseParquetFile
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) { g = g.append("int32_field", i); }
if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
if (i % 10 == 0) { g = g.append("int32_field2", i); }
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
示例4: generateParquetFileWithNullCharacters
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional binary cat_field (UTF8); } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
writer.write(g.append("cat_field", value));
}
} finally {
writer.close();
}
return f;
}
示例5: testReadUsingRequestedSchemaWithIncompatibleField
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingRequestedSchemaWithIncompatibleField(){
MessageType originalSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithIncompatibleField = new MessageType("schema",
new PrimitiveType(OPTIONAL, BINARY, "e")); // Incompatible schema: different type
readGroups(store, originalSchema, schemaWithIncompatibleField, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: optional binary e != optional int32 e", e.getMessage());
}
}
示例6: testReadUsingSchemaWithRequiredFieldThatWasOptional
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional(){
MessageType originalSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema",
new PrimitiveType(REQUIRED, INT32, "e")); // Incompatible schema: required when it was optional
readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
}
}
示例7: testReadUsingProjectedSchema
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testReadUsingProjectedSchema(){
MessageType orginalSchema = new MessageType("schema",
new PrimitiveType(REQUIRED, INT32, "a"),
new PrimitiveType(REQUIRED, INT32, "b")
);
MessageType projectedSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "b")
);
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
{
List<Group> groups = new ArrayList<Group>();
groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
Object[][] expected = {
{2},
};
validateGroups(groups, expected);
}
}
示例8: testOneOfEach
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void testOneOfEach() {
MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
Group g1 = gf.newGroup()
.append("a", 1l)
.append("b", 2)
.append("c", 3.0f)
.append("d", 4.0d)
.append("e", true)
.append("f", Binary.fromString("6"))
.append("g", new NanoTime(1234, System.currentTimeMillis() * 1000))
.append("h", Binary.fromString("abc"));
testSchema(oneOfEachSchema, Arrays.asList(g1));
}
示例9: writeData
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
private static void writeData(SimpleGroupFactory f, ParquetWriter<Group> writer) throws IOException {
for (int i = 0; i < nElements; i++) {
int index = i % ALPHABET.length();
Group group = f.newGroup()
.append("binary_field", ALPHABET.substring(index, index+1))
.append("single_value_field", "sharp")
.append("int32_field", intValues[i % intValues.length])
.append("int64_field", longValues[i % longValues.length])
.append("double_field", toDouble(intValues[i % intValues.length]))
.append("float_field", toFloat(intValues[i % intValues.length]))
.append("plain_int32_field", i)
.append("fallback_binary_field", i < (nElements / 2) ?
ALPHABET.substring(index, index+1) : UUID.randomUUID().toString());
writer.write(group);
}
writer.close();
}
示例10: prepareFile
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@BeforeClass
public static void prepareFile() throws IOException {
cleanup();
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = ExampleParquetWriter.builder(file)
.withWriterVersion(PARQUET_1_0)
.withCompressionCodec(GZIP)
.withRowGroupSize(1024*1024)
.withPageSize(1024)
.enableDictionaryEncoding()
.withDictionaryPageSize(2*1024)
.withConf(conf)
.build();
writeData(f, writer);
}
示例11: run
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public void run() {
Configuration conf = new Configuration();
int blockSize = 1 * 1024;
int pageSize = 1 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
Path basePath = new Path("file:///Users/Jelly/Developer/test");
MessageType schema = MessageTypeParser.parseMessageType("message test {" +
"required binary id; " +
"required binary content; " +
"required int64 int64_field; " +
"}");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema, conf);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
try {
ParquetWriter<Group> parquetWriter = new ParquetWriter(
basePath,
writeSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize, pageSize, dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
for (int i = 0; i < 50000; i++) {
parquetWriter.write(groupFactory.newGroup()
.append("id", "10")
.append("content", "test" + i)
.append("int64_field", Long.valueOf(i)));
}
parquetWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
示例12: test
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void test() throws IOException
{
Type name = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, "name");
Type age = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.INT32, "age");
Type score = new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.DOUBLE, "score");
Type student = new MessageType("student", Arrays.asList(name, age, score));
MessageType schema = new MessageType("student", student);
int blockSize = 256 * 1024 * 1024;
int pageSize = 6 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
Path path = new Path("hdfs://127.0.0.1:9000/student.parquet");
groupWriteSupport.setSchema(schema, conf);
ParquetWriter parquetWriter = new ParquetWriter(
path,
groupWriteSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize,
pageSize,
dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
}
示例13: test
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
@Test
public void test() {
SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(schema);
Group group = simpleGroupFactory.newGroup();
for ( String[] s: schema.getPaths()) {
System.out.println(s.length);
for (String ss: s) {
System.out.println(ss);
}
}
}
示例14: generateParquetFile
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
static File generateParquetFile(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
writer.write(fact.newGroup()
.append("int32_field", 32 + i)
.append("int64_field", 64L + i)
.append("float_field", 1.0f + i)
.append("double_field", 2.0d + i)
.append("timestamp_field", date.getTime() + (i * 117))
);
}
} finally {
writer.close();
}
return f;
}
示例15: GroupRecordConverter
import org.apache.parquet.example.data.simple.SimpleGroupFactory; //导入依赖的package包/类
public GroupRecordConverter(MessageType schema) {
this.simpleGroupFactory = new SimpleGroupFactory(schema);
this.root = new SimpleGroupConverter(null, 0, schema) {
@Override
public void start() {
this.current = simpleGroupFactory.newGroup();
}
@Override
public void end() {
}
};
}