本文整理汇总了Java中parquet.schema.MessageTypeParser.parseMessageType方法的典型用法代码示例。如果您正苦于以下问题:Java MessageTypeParser.parseMessageType方法的具体用法?Java MessageTypeParser.parseMessageType怎么用?Java MessageTypeParser.parseMessageType使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.schema.MessageTypeParser
的用法示例。
在下文中一共展示了MessageTypeParser.parseMessageType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initScanFilter
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
/**
* init the scan filter with the read schema
* @param scan
*/
public void initScanFilter(Scan scan){
String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
try {
if (scan != null && schema != null && !schema.isEmpty()) {
MessageType readSchema = MessageTypeParser.parseMessageType(schema);
//readSchema.getFields();
List<Type> types = readSchema.getFields();
for(Type type : types){
String columnName = type.getName();
if(columnName.startsWith("cf"))// fetch the real column name
columnName = columnName.substring(3);
filterColumns.add(columnName.getBytes());
}
}
}catch (Exception e){
//TODO: send the exception back to the client
LOG.error("parse the message schema error" + e);
}
}
示例2: createParquetWriter
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
/**
* create writer for parquet in PStoreFile
* @return {@link org.apache.hadoop.hbase.regionserver.PStoreFile.Writer}
*/
public PStoreFile.Writer createParquetWriter(Map<String, String> meta){
PStoreFile.Writer writer = null;
MessageType schema = null;
String schema_str = Bytes.toString(getHRegion().getTableDesc().getValue(HConstants.SCHEMA.getBytes()));
if(schema_str != null){
schema = MessageTypeParser.parseMessageType(schema_str);
}else {
LOG.error("No schema found! for " + this.getTableName());
return null;
}
Path filePath = new Path(fs.createTempName() + ".parquet");
try {
writer = new PStoreFile.WriterBuilder(conf, getFileSystem(), schema, filePath)
.addMetaData(meta)
.build();
}catch (IOException ioe){
LOG.error(ioe);
}
return writer;
}
示例3: getParquetInputSplit
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
示例4: end
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : blocks) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
return length;
}
示例5: main
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径
Configuration configuration = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式
" message people { " +
"required binary rowkey;" +
"required binary cf:name;" +
"required binary cf:age;" +
"required int64 timestamp;"+
" }");
GroupWriteSupport.setSchema(schema, configuration);
SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
Path file = new Path(root, "people002.parquet");
Map<String, String> meta = new HashMap<String, String>();
meta.put("startkey", "1");
meta.put("endkey", "2");
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(meta),
CompressionCodecName.UNCOMPRESSED,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
configuration);
Group group = sfg.newGroup().append("rowkey", "1")
.append("cf:name", "wangxiaoyi")
.append("cf:age", "24")
.append("timestamp", System.currentTimeMillis());
for (int i = 0; i < 10000; ++i) {
writer.write(
sfg.newGroup()
.append("name", "wangxiaoyi" + i)
.append("age", i));
}
writer.close();
}
示例6: testInitWithoutSpecifyingRequestSchema
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
GroupReadSupport s = new GroupReadSupport();
Configuration configuration = new Configuration();
Map<String, String> keyValueMetaData = new HashMap<String, String>();
MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
assertEquals(context.getRequestedSchema(), fileSchema);
}
示例7: testInitWithPartialSchema
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Test
public void testInitWithPartialSchema() {
GroupReadSupport s = new GroupReadSupport();
Configuration configuration = new Configuration();
Map<String, String> keyValueMetaData = new HashMap<String, String>();
MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
MessageType partialSchema = MessageTypeParser.parseMessageType(partialSchemaStr);
configuration.set(ReadSupport.PARQUET_READ_SCHEMA, partialSchemaStr);
ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
assertEquals(context.getRequestedSchema(), partialSchema);
}
示例8: setUp
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Before
public void setUp() {
blocks = new ArrayList<BlockMetaData>();
for (int i = 0; i < 10; i++) {
blocks.add(newBlock(i * 10, 10));
}
schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
}
示例9: testWriteMode
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Test
public void testWriteMode() throws Exception {
File testDir = new File("target/test/TestParquetFileWriter/");
testDir.mkdirs();
File testFile = new File(testDir, "testParquetFile");
testFile = testFile.getAbsoluteFile();
testFile.createNewFile();
MessageType schema = MessageTypeParser.parseMessageType(
"message m { required group a {required binary b;} required group "
+ "c { required int64 d; }}");
Configuration conf = new Configuration();
ParquetFileWriter writer = null;
boolean exceptionThrown = false;
Path path = new Path(testFile.toURI());
try {
writer = new ParquetFileWriter(conf, schema, path,
ParquetFileWriter.Mode.CREATE);
} catch (IOException ioe1) {
exceptionThrown = true;
}
assertTrue(exceptionThrown);
exceptionThrown = false;
try {
writer = new ParquetFileWriter(conf, schema, path,
ParquetFileWriter.Mode.OVERWRITE);
} catch (IOException ioe2) {
exceptionThrown = true;
}
assertTrue(!exceptionThrown);
testFile.delete();
}
示例10: testWriteReadStatisticsAllNulls
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Test
public void testWriteReadStatisticsAllNulls() throws Exception {
File testFile = new File("target/test/TestParquetFileWriter/testParquetFile").getAbsoluteFile();
testFile.delete();
writeSchema = "message example {\n" +
"required binary content;\n" +
"}";
Path path = new Path(testFile.toURI());
MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
Configuration configuration = new Configuration();
GroupWriteSupport.setSchema(schema, configuration);
ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport(null));
Group r1 = new SimpleGroup(schema);
writer.write(r1);
writer.close();
ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
// assert the statistics object is not empty
assertTrue((readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().isEmpty()) == false);
// assert the number of nulls are correct for the first block
assertEquals(1, (readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().getNumNulls()));
}
示例11: testTajoToParquetConversion
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
private void testTajoToParquetConversion(
Schema tajoSchema, String schemaString) throws Exception {
TajoSchemaConverter converter = new TajoSchemaConverter();
MessageType schema = converter.convert(tajoSchema);
MessageType expected = MessageTypeParser.parseMessageType(schemaString);
assertEquals("converting " + schema + " to " + schemaString,
expected.toString(), schema.toString());
}
示例12: getSchema
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
public static MessageType getSchema(Configuration configuration) {
return MessageTypeParser.parseMessageType(configuration.get(PARQUET_SCHEMA_PROPERTY_NAME));
}
示例13: getResultFromGroup
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
public List<Cell> getResultFromGroup(Group group){
List<Cell> cells = new LinkedList<>();
SimpleGroupFactory sgf = new SimpleGroupFactory(MessageTypeParser.parseMessageType(""));
sgf.newGroup();
byte[] row = group.getBinary("rowkey", 0).getBytes();
GroupType type = group.getType();
for(Type t : type.getFields()){
byte [] value = group.getBinary(t.getName(), 0).getBytes();
Cell cell = new KeyValue(row, "cf".getBytes(), t.getName().getBytes(), value);
cells.add(cell);
}
return cells;
}
示例14: getStoreScanner
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
/**
* get the store scanner of parquet type
* @return
*/
public RecordScanner getStoreScanner(Scan scan, long readPt){
RecordScanner scanner = null;
lock.readLock().lock();
try{
List<RecordScanner> scanners = new LinkedList<>();
byte[] startRow = scan.getStartRow();
//add memstore scanner
RecordScanner memScanner = this.pMemStore.getScanner(scan);
if(pMemStore.size() > 0 && pMemStore.getEndKey() != null && Bytes.compareTo(startRow, this.pMemStore.getEndKey()) <= 0){
memScanner.seek(startRow);
if(memScanner.hasNext()){
scanners.add(memScanner);
}
}
//add memstore snapshot scanner
RecordScanner snapshotScanner = pMemStore.getSnapshotScanner(scan);
snapshotScanner.seek(startRow);
if(snapshotScanner.hasNext()){
scanners.add(snapshotScanner);
}
MessageType readSchema = null;
String sSchema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
//TODO: verify the schema
if(sSchema != null) {
readSchema = MessageTypeParser.parseMessageType(sSchema);
}
scanners.addAll(loadPFileScanner(startRow, readSchema));
scanner = new PStoreScanner(this, scan, readPt, scanners);
}finally {
lock.readLock().unlock();
}
return scanner;
}
示例15: test
import parquet.schema.MessageTypeParser; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
Path file = new Path("target/test/TestColumnChunkPageWriteStore/test.parquet");
Path root = file.getParent();
FileSystem fs = file.getFileSystem(conf);
if (fs.exists(root)) {
fs.delete(root, true);
}
fs.mkdirs(root);
MessageType schema = MessageTypeParser.parseMessageType("message test { repeated binary bar; }");
ColumnDescriptor col = schema.getColumns().get(0);
Encoding dataEncoding = PLAIN;
int valueCount = 10;
int d = 1;
int r = 2;
int v = 3;
BytesInput definitionLevels = BytesInput.fromInt(d);
BytesInput repetitionLevels = BytesInput.fromInt(r);
Statistics<?> statistics = new BinaryStatistics();
BytesInput data = BytesInput.fromInt(v);
int rowCount = 5;
int nullCount = 1;
{
ParquetFileWriter writer = new ParquetFileWriter(conf, schema, file);
writer.start();
writer.startBlock(rowCount);
{
ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor(GZIP), schema, initialSize);
PageWriter pageWriter = store.getPageWriter(col);
pageWriter.writePageV2(
rowCount, nullCount, valueCount,
repetitionLevels, definitionLevels,
dataEncoding, data,
statistics);
store.flushToFileWriter(writer);
}
writer.endBlock();
writer.end(new HashMap<String, String>());
}
{
ParquetMetadata footer = ParquetFileReader.readFooter(conf, file, NO_FILTER);
ParquetFileReader reader = new ParquetFileReader(conf, file, footer.getBlocks(), schema.getColumns());
PageReadStore rowGroup = reader.readNextRowGroup();
PageReader pageReader = rowGroup.getPageReader(col);
DataPageV2 page = (DataPageV2) pageReader.readPage();
assertEquals(rowCount, page.getRowCount());
assertEquals(nullCount, page.getNullCount());
assertEquals(valueCount, page.getValueCount());
assertEquals(d, intValue(page.getDefinitionLevels()));
assertEquals(r, intValue(page.getRepetitionLevels()));
assertEquals(dataEncoding, page.getDataEncoding());
assertEquals(v, intValue(page.getData()));
assertEquals(statistics.toString(), page.getStatistics().toString());
reader.close();
}
}