当前位置: 首页>>代码示例>>Java>>正文


Java ParquetInputSplit类代码示例

本文整理汇总了Java中parquet.hadoop.ParquetInputSplit的典型用法代码示例。如果您正苦于以下问题:Java ParquetInputSplit类的具体用法?Java ParquetInputSplit怎么用?Java ParquetInputSplit使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ParquetInputSplit类属于parquet.hadoop包,在下文中一共展示了ParquetInputSplit类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSplits

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    if (isTaskSideMetaData(job)) {
        return super.getSplits(job, numSplits);
    }

    List<Footer> footers = getFooters(job);
    List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
    if (splits == null) {
        return null;
    }
    InputSplit[] resultSplits = new InputSplit[splits.size()];
    int i = 0;
    for (ParquetInputSplit split : splits) {
        resultSplits[i++] = new ParquetInputSplitWrapper(split);
    }
    return resultSplits;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:DeprecatedParquetInputFormat.java

示例2: createRecordReader

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Test
public void createRecordReader() throws Exception {

  String parquetFilePath = getClass().getClassLoader().getResource( "sample.pqt" ).toExternalForm();

  PentahoParquetInputFormat pentahoParquetInputFormat = new PentahoParquetInputFormat();
  pentahoParquetInputFormat.setInputFile( getClass().getClassLoader().getResource( "sample.pqt" ).toExternalForm() );
  SchemaDescription schema = pentahoParquetInputFormat.readSchema( parquetFilePath );

  pentahoParquetInputFormat.setSchema( schema );

  ParquetInputSplit parquetInputSplit = Mockito.spy( ParquetInputSplit.class );
  Whitebox.setInternalState( parquetInputSplit, "rowGroupOffsets", new long[] { 4 } );
  Whitebox.setInternalState( parquetInputSplit, "file", new Path( parquetFilePath ) );

  PentahoInputSplitImpl pentahoInputSplit = new PentahoInputSplitImpl( parquetInputSplit );

  IPentahoInputFormat.IPentahoRecordReader recordReader =
      pentahoParquetInputFormat.createRecordReader( pentahoInputSplit );

  Assert.assertNotNull( "recordReader should NOT be null!", recordReader );
  Assert.assertTrue( "recordReader should be instance of IPentahoInputFormat.IPentahoRecordReader",
    recordReader instanceof IPentahoInputFormat.IPentahoRecordReader );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:25,代码来源:PentahoParquetInputFormatTest.java

示例3: readCreatedParquetFile

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
private IPentahoInputFormat.IPentahoRecordReader readCreatedParquetFile( String parquetFilePath ) {

    IPentahoInputFormat.IPentahoRecordReader recordReader = null;
    try {
      PentahoParquetInputFormat pentahoParquetInputFormat = new PentahoParquetInputFormat();
      pentahoParquetInputFormat.setInputFile( parquetFilePath );
      SchemaDescription schema = pentahoParquetInputFormat.readSchema( parquetFilePath );
      pentahoParquetInputFormat.setSchema( schema );

      ParquetInputSplit parquetInputSplit = Mockito.spy( ParquetInputSplit.class );
      Whitebox.setInternalState( parquetInputSplit, "rowGroupOffsets", new long[] { 4 } );
      Whitebox.setInternalState( parquetInputSplit, "file", new org.apache.hadoop.fs.Path( parquetFilePath ) );
      PentahoInputSplitImpl pentahoInputSplit = new PentahoInputSplitImpl( parquetInputSplit );

      recordReader = pentahoParquetInputFormat.createRecordReader( pentahoInputSplit );
    } catch ( Exception e ) {
      e.printStackTrace();
    }
    return recordReader;
  }
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:21,代码来源:PentahoParquetRecordWriterTest.java

示例4: map

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public void map(LongWritable key, Group value, Context context) throws IOException, InterruptedException {
    NullWritable outKey = NullWritable.get();
    if(expectedFields == null) {
	// Get the file schema which may be different from the fields in a particular record) from the input split
	String fileSchema = ((ParquetInputSplit)context.getInputSplit()).getFileSchema();
	// System.err.println("file schema from context: " + fileSchema);
	RecordSchema schema = new RecordSchema(fileSchema);
	expectedFields = schema.getFields();
	//System.err.println("inferred schema: " + expectedFields.toString());
    }

    // No public accessor to the column values in a Group, so extract them from the string representation
    String line = value.toString();
    String[] fields = line.split("\n");

           StringBuilder csv = new StringBuilder();
    boolean hasContent = false;
    int i = 0;
    // Look for each expected column
    Iterator<FieldDescription> it = expectedFields.iterator();
    while(it.hasNext()) {
	if(hasContent ) {
	    csv.append(',');
	}
	String name = it.next().name;
	if(fields.length > i) {
	    String[] parts = fields[i].split(": ");
	    // We assume proper order, but there may be fields missing
	    if(parts[0].equals(name)) {
		boolean mustQuote = (parts[1].contains(",") || parts[1].contains("'"));
		if(mustQuote) {
		    csv.append('"');
		}
		csv.append(parts[1]);
		if(mustQuote) {
		    csv.append('"');
		}
		hasContent = true;
		i++;
	    }
	}
    }
    context.write(outKey, new Text(csv.toString()));
       }
 
开发者ID:cloudera,项目名称:parquet-examples,代码行数:46,代码来源:TestReadParquet.java

示例5: ParquetInputSplitWrapper

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
public ParquetInputSplitWrapper(ParquetInputSplit realSplit) {
    this.realSplit = realSplit;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetInputFormat.java

示例6: readFields

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public void readFields(DataInput in) throws IOException {
    realSplit = new ParquetInputSplit();
    realSplit.readFields(in);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:6,代码来源:DeprecatedParquetInputFormat.java

示例7: test11

import parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Test
public void test11() throws IOException, InterruptedException {
    // verify locations in order
    ArrayList<InputSplit> rawSplits = new ArrayList<InputSplit>();

    // first split is parquetinputsplit
    rawSplits.add(new ParquetInputSplit(new Path("path1"), 0, 100,
            new String[] { "l1", "l2", "l3" },
            new ArrayList<BlockMetaData>(), "", "",
            new HashMap<String, String>(), new HashMap<String, String>()));
    // second split is file split
    rawSplits.add(new FileSplit(new Path("path2"), 0, 400, new String[] {
            "l5", "l6", "l1" }));

    List<InputSplit> result = pigInputFormat.getPigSplits(rawSplits, 0, ok,
            null, true, conf);

    // pig combines two into one pigsplit
    Assert.assertEquals(result.size(), 1);

    for (InputSplit split : result) {
        PigSplit pigSplit = (PigSplit) split;

        // write to a byte array output stream
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

        DataOutput out = new DataOutputStream(outputStream);
        pigSplit.write(out);
        // restore the pig split from the byte array
        ByteArrayInputStream inputStream = new ByteArrayInputStream(
                outputStream.toByteArray());

        DataInput in = new DataInputStream(inputStream);
        PigSplit anotherSplit = new PigSplit();
        anotherSplit.setConf(conf);
        anotherSplit.readFields(in);

        Assert.assertEquals(500, anotherSplit.getLength());

        Assert.assertEquals(2, anotherSplit.getNumPaths());
        Assert.assertEquals("parquet.hadoop.ParquetInputSplit",
                (anotherSplit.getWrappedSplit(0).getClass().getName()));
        Assert.assertEquals(
                "org.apache.hadoop.mapreduce.lib.input.FileSplit",
                (anotherSplit.getWrappedSplit(1).getClass().getName()));
    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:48,代码来源:TestSplitCombine.java


注:本文中的parquet.hadoop.ParquetInputSplit类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。