本文整理汇总了Java中org.apache.parquet.example.data.Group类的典型用法代码示例。如果您正苦于以下问题:Java Group类的具体用法?Java Group怎么用?Java Group使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Group类属于org.apache.parquet.example.data包,在下文中一共展示了Group类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s [generic options] <input> <output>\n",
getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Job job = new Job(getConf(), "Text to Parquet");
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(TextToParquetMapper.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
AvroParquetOutputFormat.setSchema(job, SCHEMA);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(Group.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例2: main
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
SimpleGroupFactory sgf = new SimpleGroupFactory(simpleSchema);
GroupFactory gf = new SimpleGroupFactory(complexSchema);
SimpleGroupFactory sngf = new SimpleGroupFactory(simpleNullableSchema);
GroupFactory ngf = new SimpleGroupFactory(complexNullableSchema);
ParquetWriter<Group> simpleWriter = initWriter(simpleSchema, "drill/parquet_test_file_simple");
ParquetWriter<Group> complexWriter = initWriter(complexSchema, "drill/parquet_test_file_complex");
ParquetWriter<Group> simpleNullableWriter = initWriter(simpleNullableSchema, "drill/parquet_test_file_simple_nullable");
ParquetWriter<Group> complexNullableWriter = initWriter(complexNullableSchema, "drill/parquet_test_file_complex_nullable");
ParquetSimpleTestFileGenerator.writeSimpleValues(sgf, simpleWriter, false);
ParquetSimpleTestFileGenerator.writeSimpleValues(sngf, simpleNullableWriter, true);
ParquetSimpleTestFileGenerator.writeComplexValues(gf, complexWriter, false);
ParquetSimpleTestFileGenerator.writeComplexValues(ngf, complexNullableWriter, true);
simpleWriter.close();
complexWriter.close();
simpleNullableWriter.close();
complexNullableWriter.close();
}
示例3: generateEmptyWithSchema
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
writer.close();
return f;
}
示例4: generateSparseParquetFile
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) { g = g.append("int32_field", i); }
if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
if (i % 10 == 0) { g = g.append("int32_field2", i); }
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
示例5: generateParquetFileWithNullCharacters
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional binary cat_field (UTF8); } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
writer.write(g.append("cat_field", value));
}
} finally {
writer.close();
}
return f;
}
示例6: read
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
{
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
for (int i = 0; i < nRows; i++) {
Group group = reader.read();
blackhole.consume(group.getBinary("binary_field", 0));
blackhole.consume(group.getInteger("int32_field", 0));
blackhole.consume(group.getLong("int64_field", 0));
blackhole.consume(group.getBoolean("boolean_field", 0));
blackhole.consume(group.getFloat("float_field", 0));
blackhole.consume(group.getDouble("double_field", 0));
blackhole.consume(group.getBinary("flba_field", 0));
blackhole.consume(group.getInt96("int96_field", 0));
}
reader.close();
}
示例7: toString
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
public String toString(String indent) {
String result = "";
int i = 0;
for (Type field : schema.getFields()) {
String name = field.getName();
List<Object> values = data[i];
++i;
if (values != null) {
if (values.size() > 0) {
for (Object value : values) {
result += indent + name;
if (value == null) {
result += ": NULL\n";
} else if (value instanceof Group) {
result += "\n" + ((SimpleGroup)value).toString(indent+" ");
} else {
result += ": " + value.toString() + "\n";
}
}
}
}
}
return result;
}
示例8: testReadUsingProjectedSchema
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testReadUsingProjectedSchema(){
MessageType orginalSchema = new MessageType("schema",
new PrimitiveType(REQUIRED, INT32, "a"),
new PrimitiveType(REQUIRED, INT32, "b")
);
MessageType projectedSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "b")
);
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
{
List<Group> groups = new ArrayList<Group>();
groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
Object[][] expected = {
{2},
};
validateGroups(groups, expected);
}
}
示例9: validateGroups
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
private void validateGroups(List<Group> groups1, Object[][] e1) {
Iterator<Group> i1 = groups1.iterator();
for (int i = 0; i < e1.length; i++) {
Object[] objects = e1[i];
Group next = i1.next();
for (int j = 0; j < objects.length; j++) {
Object object = objects[j];
if (object == null) {
assertEquals(0, next.getFieldRepetitionCount(j));
} else {
assertEquals("looking for r[" + i + "][" + j + "][0]=" + object, 1, next.getFieldRepetitionCount(j));
assertEquals(object, next.getInteger(j, 0));
}
}
}
}
示例10: testOneOfEach
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testOneOfEach() {
MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
Group g1 = gf.newGroup()
.append("a", 1l)
.append("b", 2)
.append("c", 3.0f)
.append("d", 4.0d)
.append("e", true)
.append("f", Binary.fromString("6"))
.append("g", new NanoTime(1234, System.currentTimeMillis() * 1000))
.append("h", Binary.fromString("abc"));
testSchema(oneOfEachSchema, Arrays.asList(g1));
}
示例11: testFilterOnInteger
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testFilterOnInteger() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// Get first record
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
readOne(recordReader, "r2 filtered out", r1);
// Get second record
recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(column("DocId", equalTo(20l))));
readOne(recordReader, "r1 filtered out", r2);
}
示例12: testApplyFunctionFilterOnLong
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testApplyFunctionFilterOnLong() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// Get first record
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(column("DocId", equalTo(10l))));
readOne(recordReader, "r2 filtered out", r1);
// Get second record
recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(column("DocId", applyFunctionToLong(new LongGreaterThan15Predicate()))));
readOne(recordReader, "r1 filtered out", r2);
}
示例13: testPaged
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testPaged() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 6);
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(page(4, 4)));
List<Group> all = readAll(recordReader);
assertEquals("expecting records " + all, 4, all.size());
for (int i = 0; i < all.size(); i++) {
assertEquals("expecting record", (i%2 == 0 ? r2 : r1).toString(), all.get(i).toString());
}
}
示例14: testFilteredAndPaged
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testFilteredAndPaged() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 8);
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(and(column("DocId", equalTo(10l)), page(2, 4))));
List<Group> all = readAll(recordReader);
assertEquals("expecting 4 records " + all, 4, all.size());
for (int i = 0; i < all.size(); i++) {
assertEquals("expecting record1", r1.toString(), all.get(i).toString());
}
}
示例15: testFilteredOrPaged
import org.apache.parquet.example.data.Group; //导入依赖的package包/类
@Test
public void testFilteredOrPaged() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 8);
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>)
columnIO.getRecordReader(memPageStore, recordConverter,
FilterCompat.get(or(column("DocId", equalTo(10l)),
column("DocId", equalTo(20l)))));
List<Group> all = readAll(recordReader);
assertEquals("expecting 8 records " + all, 16, all.size());
for (int i = 0; i < all.size () / 2; i++) {
assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString());
assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString());
}
}