当前位置: 首页>>代码示例>>Java>>正文


Java Binary类代码示例

本文整理汇总了Java中parquet.io.api.Binary的典型用法代码示例。如果您正苦于以下问题:Java Binary类的具体用法?Java Binary怎么用?Java Binary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Binary类属于parquet.io.api包,在下文中一共展示了Binary类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readField

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {
  this.bytebuf = pageReader.pageData;
  if (usingDictionary) {
    NullableVarBinaryVector.Mutator mutator =  castedVector.getMutator();
    Binary currDictValToWrite;
    for (int i = 0; i < recordsReadInThisIteration; i++){
      currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
      mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
          currDictValToWrite.length());
    }
    // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
    // and we will go into the else condition below. The readField method of the parent class requires the
    // writer index to be set correctly.
    int writerIndex = castedBaseVector.getBuffer().writerIndex();
    castedBaseVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
  } else {
    super.readField(recordsToReadInThisPass);
    // TODO - replace this with fixed binary type in drill
    // for now we need to write the lengths of each value
    int byteLength = dataTypeLengthInBits / 8;
    for (int i = 0; i < recordsToReadInThisPass; i++) {
      castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
    }
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:27,代码来源:NullableFixedByteAlignedReaders.java

示例2: getCustomerDataSet

import parquet.io.api.Binary; //导入依赖的package包/类
private static DataSet<Tuple2<Void, CustomerTable>> getCustomerDataSet(ExecutionEnvironment env) throws 
	IOException {
	Job job = Job.getInstance();

	ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
	job.getConfiguration().set("parquet.thrift.column.filter", "ID;MKTSEGMENT");

	HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
		CustomerTable.class, job);

	// Filter market segment "AUTOMOBILE"
	BinaryColumn mktsegment = binaryColumn("MKTSEGMENT");
	FilterPredicate mktsegmentPred = eq(mktsegment, Binary.fromString("AUTOMOBILE"));
	ParquetInputFormat.setFilterPredicate(job.getConfiguration(), mktsegmentPred);

	ParquetThriftInputFormat.addInputPath(job, new Path(customerPath));

	DataSet<Tuple2<Void, CustomerTable>> data = env.createInput(hadoopInputFormat);

	return data;
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:22,代码来源:TPCHQuery3Parquet.java

示例3: readThrift

import parquet.io.api.Binary; //导入依赖的package包/类
public static DataSet<Tuple2<Void, Person>> readThrift(ExecutionEnvironment env, String inputPath) throws 
	IOException {
	Job job = Job.getInstance();

	HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, Person
		.class, job);

	// schema projection: don't read attributes id and email
	job.getConfiguration().set("parquet.thrift.column.filter", "name;id;email;phone/number");

	FileInputFormat.addInputPath(job, new Path(inputPath));

	// push down predicates: get all persons with name = "Felix"
	BinaryColumn name = binaryColumn("name");
	FilterPredicate namePred = eq(name, Binary.fromString("Felix"));
	ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred);

	DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat);

	return data;
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:22,代码来源:ParquetThriftExample.java

示例4: addBinary

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
final public void addBinary(Binary value) {
  if (fixedClass == null) {
    parent.add(value.getBytes());
  } else {
    if (fixedClassCtor == null) {
      throw new IllegalArgumentException(
          "fixedClass specified but fixedClassCtor is null.");
    }
    try {
      Object fixed = fixedClassCtor.newInstance(value.getBytes());
      parent.add(fixed);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
}
 
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:18,代码来源:HMAvroConverter.java

示例5: dateFromInt96

import parquet.io.api.Binary; //导入依赖的package包/类
private static long dateFromInt96( Binary value ) {
  byte[] readBuffer = value.getBytes();
  if ( readBuffer.length != 12 ) {
    throw new RuntimeException( "Invalid byte array length for INT96" );
  }

  long timeOfDayNanos =
      ( ( (long) readBuffer[7] << 56 ) + ( (long) ( readBuffer[6] & 255 ) << 48 )
          + ( (long) ( readBuffer[5] & 255 ) << 40 ) + ( (long) ( readBuffer[4] & 255 ) << 32 )
          + ( (long) ( readBuffer[3] & 255 ) << 24 ) + ( ( readBuffer[2] & 255 ) << 16 )
          + ( ( readBuffer[1] & 255 ) << 8 ) + ( ( readBuffer[0] & 255 ) << 0 ) );

  int julianDay =
      ( (int) ( readBuffer[11] & 255 ) << 24 ) + ( ( readBuffer[10] & 255 ) << 16 )
          + ( ( readBuffer[9] & 255 ) << 8 ) + ( ( readBuffer[8] & 255 ) << 0 );

  return ( julianDay - JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000;
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:19,代码来源:ParquetConverter.java

示例6: addBinary

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
  byte[] data = value.getBytes();
  if (data == null) {
    record.add(name, null);
    return;
  }

  if (data != null) {
    try {
      CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer());
      record.add(name, buffer.toString());
      return;
    } catch (Throwable th) {
    }
  }

  record.add(name, value.getBytes());
}
 
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:20,代码来源:SimpleRecordConverter.java

示例7: addBinary

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
  holder.buffer = buf = buf.reallocIfNeeded(value.length());
  buf.setBytes(0, value.toByteBuffer());
  holder.start = 0;
  holder.end = value.length();
  writer.write(holder);
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:9,代码来源:DrillParquetGroupConverter.java

示例8: readField

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {

  recordsReadInThisIteration = Math.min(pageReader.currentPageCount
      - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
  readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
  readLength = (int) Math.ceil(readLengthInBits / 8.0);

  if (usingDictionary) {
    VarBinaryVector.Mutator mutator =  castedVector.getMutator();
    Binary currDictValToWrite = null;
    for (int i = 0; i < recordsReadInThisIteration; i++){
      currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
      mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
          currDictValToWrite.length());
    }
    // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
    // and we will go into the else condition below. The readField method of the parent class requires the
    // writer index to be set correctly.
    int writerIndex = castedVector.getBuffer().writerIndex();
    castedVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
  } else {
    super.readField(recordsToReadInThisPass);
  }

  // TODO - replace this with fixed binary type in drill
  // now we need to write the lengths of each value
  int byteLength = dataTypeLengthInBits / 8;
  for (int i = 0; i < recordsToReadInThisPass; i++) {
    castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:33,代码来源:ParquetFixedWidthDictionaryReaders.java

示例9: testAllFilter

import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testAllFilter() throws Exception {
    BinaryColumn name = binaryColumn("name");

    FilterPredicate pred = eq(name, Binary.fromString("no matches"));

    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertEquals(new ArrayList<Group>(), found);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestRecordLevelFilters.java

示例10: keep

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public boolean keep(Binary value) {
    if (value == null) {
        return false;
    }
    return value.toStringUsingUTF8().startsWith("p");
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:8,代码来源:TestRecordLevelFilters.java

示例11: readValues

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void readValues(ValuesReader valuesReader, int valueNumber, ParquetLevelReader definitionReader)
{
    for (int i = 0; i < valueNumber; i++) {
        if (definitionReader.readLevel() == descriptor.getMaxDefinitionLevel()) {
            Binary binary = valuesReader.readBytes();
            if (binary.length() != 0) {
                VARCHAR.writeSlice(blockBuilder, Slices.wrappedBuffer(binary.getBytes()));
                continue;
            }
        }
        blockBuilder.appendNull();
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:15,代码来源:ParquetBinaryBuilder.java

示例12: addBinary

import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value)
{
    nulls[fieldIndex] = false;
    if (types[fieldIndex] == TIMESTAMP) {
        longs[fieldIndex] = ParquetTimestampUtils.getTimestampMillis(value);
    }
    else {
        slices[fieldIndex] = wrappedBuffer(value.getBytes());
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:12,代码来源:ParquetHiveRecordCursor.java

示例13: getTimestampMillis

import parquet.io.api.Binary; //导入依赖的package包/类
/**
 * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
 *
 * @param timestampBinary INT96 parquet timestamp
 * @return timestamp in millis, GMT timezone
 */
public static long getTimestampMillis(Binary timestampBinary)
{
    if (timestampBinary.length() != 12) {
        throw new PrestoException(HIVE_BAD_DATA, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
    }
    byte[] bytes = timestampBinary.getBytes();

    // little endian encoding - need to invert byte order
    long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
    int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]);

    return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:20,代码来源:ParquetTimestampUtils.java

示例14: testInvalidBinaryLength

import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testInvalidBinaryLength()
{
    try {
        byte[] invalidLengthBinaryTimestamp = new byte[8];
        getTimestampMillis(Binary.fromByteArray(invalidLengthBinaryTimestamp));
    }
    catch (PrestoException e) {
        assertEquals(e.getErrorCode(), HIVE_BAD_DATA.toErrorCode());
        assertEquals(e.getMessage(), "Parquet timestamp must be 12 bytes, actual 8");
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:13,代码来源:TestParquetTimestampUtils.java

示例15: assertTimestampCorrect

import parquet.io.api.Binary; //导入依赖的package包/类
private static void assertTimestampCorrect(String timestampString)
{
    Timestamp timestamp = Timestamp.valueOf(timestampString);
    Binary timestampBytes = NanoTimeUtils.getNanoTime(timestamp, false).toBinary();
    long decodedTimestampMillis = getTimestampMillis(timestampBytes);
    assertEquals(decodedTimestampMillis, timestamp.getTime());
}
 
开发者ID:y-lan,项目名称:presto,代码行数:8,代码来源:TestParquetTimestampUtils.java


注:本文中的parquet.io.api.Binary类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。