当前位置: 首页>>代码示例>>Java>>正文


Java MessageTypeParser类代码示例

本文整理汇总了Java中parquet.schema.MessageTypeParser的典型用法代码示例。如果您正苦于以下问题:Java MessageTypeParser类的具体用法?Java MessageTypeParser怎么用?Java MessageTypeParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


MessageTypeParser类属于parquet.schema包,在下文中一共展示了MessageTypeParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initScanFilter

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
 * init the scan filter with the read schema
 * @param scan
 */
public void initScanFilter(Scan scan){
    String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
    try {
        if (scan != null && schema != null && !schema.isEmpty()) {
            MessageType readSchema = MessageTypeParser.parseMessageType(schema);
            //readSchema.getFields();
            List<Type>  types = readSchema.getFields();
            for(Type type : types){
                String  columnName = type.getName();
                if(columnName.startsWith("cf"))// fetch the real column name
                    columnName = columnName.substring(3);
                filterColumns.add(columnName.getBytes());
            }

        }
    }catch (Exception e){
        //TODO: send the exception back to the client
        LOG.error("parse the message schema error" + e);
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:25,代码来源:PMemStoreImpl.java

示例2: runMapReduceJob

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

        final FileSystem fileSystem = parquetPath.getFileSystem(conf);
        fileSystem.delete(parquetPath, true);
        fileSystem.delete(outputPath, true);
        {
            jobConf.setInputFormat(TextInputFormat.class);
            TextInputFormat.addInputPath(jobConf, inputPath);
            jobConf.setNumReduceTasks(0);

            jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
            DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
            DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
            DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
            GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);

            jobConf.setMapperClass(DeprecatedMapper.class);
            mapRedJob = JobClient.runJob(jobConf);
        }
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:21,代码来源:DeprecatedOutputFormatTest.java

示例3: run

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(ExampleParquetMapReduce.class);

  job.setInputFormatClass(ExampleInputFormat.class);
  FileInputFormat.setInputPaths(job, inputPath);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputFormatClass(ExampleOutputFormat.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  ExampleOutputFormat.setSchema(
      job,
      MessageTypeParser.parseMessageType(writeSchema));

  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:42,代码来源:ExampleParquetMapReduce.java

示例4: createParquetWriter

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
 * create writer for parquet in PStoreFile
 * @return {@link org.apache.hadoop.hbase.regionserver.PStoreFile.Writer}
 */
public PStoreFile.Writer createParquetWriter(Map<String, String> meta){

    PStoreFile.Writer writer = null;

    MessageType schema = null;

    String schema_str = Bytes.toString(getHRegion().getTableDesc().getValue(HConstants.SCHEMA.getBytes()));
    if(schema_str != null){
        schema = MessageTypeParser.parseMessageType(schema_str);
    }else {
        LOG.error("No schema found! for " + this.getTableName());
        return null;
    }

    Path filePath = new Path(fs.createTempName() + ".parquet");

    try {

        writer = new PStoreFile.WriterBuilder(conf, getFileSystem(), schema, filePath)
                .addMetaData(meta)
                .build();

    }catch (IOException ioe){
        LOG.error(ioe);
    }
    return writer;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:32,代码来源:HStore.java

示例5: getParquetInputSplit

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
    MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
    long length = 0;

    for (BlockMetaData block : this.getRowGroups()) {
        List<ColumnChunkMetaData> columns = block.getColumns();
        for (ColumnChunkMetaData column : columns) {
            if (requested.containsPath(column.getPath().toArray())) {
                length += column.getTotalSize();
            }
        }
    }

    BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
    long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();

    long[] rowGroupOffsets = new long[this.getRowGroupCount()];
    for (int i = 0; i < rowGroupOffsets.length; i++) {
        rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
    }

    return new ParquetInputSplit(
            fileStatus.getPath(),
            hdfsBlock.getOffset(),
            end,
            length,
            hdfsBlock.getHosts(),
            rowGroupOffsets
    );
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:31,代码来源:ParquetInputFormat.java

示例6: end

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
    MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
    long length = 0;

    for (BlockMetaData block : blocks) {
        List<ColumnChunkMetaData> columns = block.getColumns();
        for (ColumnChunkMetaData column : columns) {
            if (requested.containsPath(column.getPath().toArray())) {
                length += column.getTotalSize();
            }
        }
    }
    return length;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:ParquetInputSplit.java

示例7: main

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public static void main(String[] args) throws IOException {


        Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径

        Configuration configuration = new Configuration();

        MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式

                " message people { " +

                        "required binary rowkey;" +
                        "required binary cf:name;" +
                        "required binary cf:age;" +
                        "required int64 timestamp;"+
                 " }");

        GroupWriteSupport.setSchema(schema, configuration);

        SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
        Path file = new Path(root, "people002.parquet");

        Map<String, String> meta = new HashMap<String, String>();
        meta.put("startkey", "1");
        meta.put("endkey", "2");


        ParquetWriter<Group> writer = new ParquetWriter<Group>(
                file,
                new GroupWriteSupport(meta),
                CompressionCodecName.UNCOMPRESSED,
                1024,
                1024,
                512,
                true,
                false,
                ParquetProperties.WriterVersion.PARQUET_1_0,
                configuration);

        Group group = sfg.newGroup().append("rowkey", "1")
                      .append("cf:name", "wangxiaoyi")
                      .append("cf:age", "24")
                      .append("timestamp", System.currentTimeMillis());


        for (int i = 0; i < 10000; ++i) {
            writer.write(
                    sfg.newGroup()
                            .append("name", "wangxiaoyi" + i)
                            .append("age", i));
        }
        writer.close();


    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:56,代码来源:TestParquetWrite.java

示例8: testInitWithoutSpecifyingRequestSchema

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
    GroupReadSupport s = new GroupReadSupport();
    Configuration configuration = new Configuration();
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);

    ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
    assertEquals(context.getRequestedSchema(), fileSchema);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:11,代码来源:GroupReadSupportTest.java

示例9: testInitWithPartialSchema

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithPartialSchema() {
    GroupReadSupport s = new GroupReadSupport();
    Configuration configuration = new Configuration();
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
    MessageType partialSchema = MessageTypeParser.parseMessageType(partialSchemaStr);
    configuration.set(ReadSupport.PARQUET_READ_SCHEMA, partialSchemaStr);

    ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
    assertEquals(context.getRequestedSchema(), partialSchema);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:13,代码来源:GroupReadSupportTest.java

示例10: setUp

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
    blocks = new ArrayList<BlockMetaData>();
    for (int i = 0; i < 10; i++) {
        blocks.add(newBlock(i * 10, 10));
    }
    schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
    fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestInputFormat.java

示例11: testWriteMode

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testWriteMode() throws Exception {
    File testDir = new File("target/test/TestParquetFileWriter/");
    testDir.mkdirs();
    File testFile = new File(testDir, "testParquetFile");
    testFile = testFile.getAbsoluteFile();
    testFile.createNewFile();
    MessageType schema = MessageTypeParser.parseMessageType(
            "message m { required group a {required binary b;} required group "
                    + "c { required int64 d; }}");
    Configuration conf = new Configuration();

    ParquetFileWriter writer = null;
    boolean exceptionThrown = false;
    Path path = new Path(testFile.toURI());
    try {
        writer = new ParquetFileWriter(conf, schema, path,
                ParquetFileWriter.Mode.CREATE);
    } catch (IOException ioe1) {
        exceptionThrown = true;
    }
    assertTrue(exceptionThrown);
    exceptionThrown = false;
    try {
        writer = new ParquetFileWriter(conf, schema, path,
                ParquetFileWriter.Mode.OVERWRITE);
    } catch (IOException ioe2) {
        exceptionThrown = true;
    }
    assertTrue(!exceptionThrown);
    testFile.delete();
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:33,代码来源:TestParquetFileWriter.java

示例12: testWriteReadStatisticsAllNulls

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testWriteReadStatisticsAllNulls() throws Exception {

    File testFile = new File("target/test/TestParquetFileWriter/testParquetFile").getAbsoluteFile();
    testFile.delete();

    writeSchema = "message example {\n" +
            "required binary content;\n" +
            "}";

    Path path = new Path(testFile.toURI());

    MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
    Configuration configuration = new Configuration();
    GroupWriteSupport.setSchema(schema, configuration);

    ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport(null));

    Group r1 = new SimpleGroup(schema);
    writer.write(r1);
    writer.close();

    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);

    // assert the statistics object is not empty
    assertTrue((readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().isEmpty()) == false);
    // assert the number of nulls are correct for the first block
    assertEquals(1, (readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().getNumNulls()));
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:30,代码来源:TestParquetFileWriter.java

示例13: setUp

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
    GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), conf);
    expectPoolSize = Math.round(ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax
            () * MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
    rowGroupSize = (int) Math.floor(expectPoolSize / 2);
    conf.setInt(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);
    codec = CompressionCodecName.UNCOMPRESSED;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestMemoryManager.java

示例14: runMapReduceJob

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

        final FileSystem fileSystem = parquetPath.getFileSystem(conf);
        fileSystem.delete(parquetPath, true);
        fileSystem.delete(outputPath, true);
        {
            writeJob = new Job(conf, "write");
            TextInputFormat.addInputPath(writeJob, inputPath);
            writeJob.setInputFormatClass(TextInputFormat.class);
            writeJob.setNumReduceTasks(0);
            ExampleOutputFormat.setCompression(writeJob, codec);
            ExampleOutputFormat.setOutputPath(writeJob, parquetPath);
            writeJob.setOutputFormatClass(ExampleOutputFormat.class);
            writeJob.setMapperClass(ReadMapper.class);
            ExampleOutputFormat.setSchema(
                    writeJob,
                    MessageTypeParser.parseMessageType(
                            writeSchema));
            writeJob.submit();
            waitForJob(writeJob);
        }
        {
            jobConf.set(ReadSupport.PARQUET_READ_SCHEMA, readSchema);
            jobConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, GroupReadSupport.class.getCanonicalName());
            jobConf.setInputFormat(MyDeprecatedInputFormat.class);
            MyDeprecatedInputFormat.setInputPaths(jobConf, parquetPath);
            jobConf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
            org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(jobConf, outputPath);
            jobConf.setMapperClass(DeprecatedWriteMapper.class);
            jobConf.setNumReduceTasks(0);
            mapRedJob = JobClient.runJob(jobConf);
        }
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:34,代码来源:DeprecatedInputFormatTest.java

示例15: testTajoToParquetConversion

import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testTajoToParquetConversion(
    Schema tajoSchema, String schemaString) throws Exception {
  TajoSchemaConverter converter = new TajoSchemaConverter();
  MessageType schema = converter.convert(tajoSchema);
  MessageType expected = MessageTypeParser.parseMessageType(schemaString);
  assertEquals("converting " + schema + " to " + schemaString,
               expected.toString(), schema.toString());
}
 
开发者ID:gruter,项目名称:tajo-cdh,代码行数:9,代码来源:TestSchemaConverter.java


注:本文中的parquet.schema.MessageTypeParser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。