本文整理汇总了Java中parquet.io.api.Binary类的典型用法代码示例。如果您正苦于以下问题:Java Binary类的具体用法?Java Binary怎么用?Java Binary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Binary类属于parquet.io.api包,在下文中一共展示了Binary类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readField
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {
this.bytebuf = pageReader.pageData;
if (usingDictionary) {
NullableVarBinaryVector.Mutator mutator = castedVector.getMutator();
Binary currDictValToWrite;
for (int i = 0; i < recordsReadInThisIteration; i++){
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
currDictValToWrite.length());
}
// Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
// and we will go into the else condition below. The readField method of the parent class requires the
// writer index to be set correctly.
int writerIndex = castedBaseVector.getBuffer().writerIndex();
castedBaseVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
} else {
super.readField(recordsToReadInThisPass);
// TODO - replace this with fixed binary type in drill
// for now we need to write the lengths of each value
int byteLength = dataTypeLengthInBits / 8;
for (int i = 0; i < recordsToReadInThisPass; i++) {
castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
}
}
}
示例2: getCustomerDataSet
import parquet.io.api.Binary; //导入依赖的package包/类
private static DataSet<Tuple2<Void, CustomerTable>> getCustomerDataSet(ExecutionEnvironment env) throws
IOException {
Job job = Job.getInstance();
ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
job.getConfiguration().set("parquet.thrift.column.filter", "ID;MKTSEGMENT");
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class,
CustomerTable.class, job);
// Filter market segment "AUTOMOBILE"
BinaryColumn mktsegment = binaryColumn("MKTSEGMENT");
FilterPredicate mktsegmentPred = eq(mktsegment, Binary.fromString("AUTOMOBILE"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), mktsegmentPred);
ParquetThriftInputFormat.addInputPath(job, new Path(customerPath));
DataSet<Tuple2<Void, CustomerTable>> data = env.createInput(hadoopInputFormat);
return data;
}
示例3: readThrift
import parquet.io.api.Binary; //导入依赖的package包/类
public static DataSet<Tuple2<Void, Person>> readThrift(ExecutionEnvironment env, String inputPath) throws
IOException {
Job job = Job.getInstance();
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, Person
.class, job);
// schema projection: don't read attributes id and email
job.getConfiguration().set("parquet.thrift.column.filter", "name;id;email;phone/number");
FileInputFormat.addInputPath(job, new Path(inputPath));
// push down predicates: get all persons with name = "Felix"
BinaryColumn name = binaryColumn("name");
FilterPredicate namePred = eq(name, Binary.fromString("Felix"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred);
DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat);
return data;
}
示例4: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
final public void addBinary(Binary value) {
if (fixedClass == null) {
parent.add(value.getBytes());
} else {
if (fixedClassCtor == null) {
throw new IllegalArgumentException(
"fixedClass specified but fixedClassCtor is null.");
}
try {
Object fixed = fixedClassCtor.newInstance(value.getBytes());
parent.add(fixed);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
示例5: dateFromInt96
import parquet.io.api.Binary; //导入依赖的package包/类
private static long dateFromInt96( Binary value ) {
byte[] readBuffer = value.getBytes();
if ( readBuffer.length != 12 ) {
throw new RuntimeException( "Invalid byte array length for INT96" );
}
long timeOfDayNanos =
( ( (long) readBuffer[7] << 56 ) + ( (long) ( readBuffer[6] & 255 ) << 48 )
+ ( (long) ( readBuffer[5] & 255 ) << 40 ) + ( (long) ( readBuffer[4] & 255 ) << 32 )
+ ( (long) ( readBuffer[3] & 255 ) << 24 ) + ( ( readBuffer[2] & 255 ) << 16 )
+ ( ( readBuffer[1] & 255 ) << 8 ) + ( ( readBuffer[0] & 255 ) << 0 ) );
int julianDay =
( (int) ( readBuffer[11] & 255 ) << 24 ) + ( ( readBuffer[10] & 255 ) << 16 )
+ ( ( readBuffer[9] & 255 ) << 8 ) + ( ( readBuffer[8] & 255 ) << 0 );
return ( julianDay - JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000;
}
示例6: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
byte[] data = value.getBytes();
if (data == null) {
record.add(name, null);
return;
}
if (data != null) {
try {
CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer());
record.add(name, buffer.toString());
return;
} catch (Throwable th) {
}
}
record.add(name, value.getBytes());
}
示例7: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
holder.buffer = buf = buf.reallocIfNeeded(value.length());
buf.setBytes(0, value.toByteBuffer());
holder.start = 0;
holder.end = value.length();
writer.write(holder);
}
示例8: readField
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {
recordsReadInThisIteration = Math.min(pageReader.currentPageCount
- pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
readLength = (int) Math.ceil(readLengthInBits / 8.0);
if (usingDictionary) {
VarBinaryVector.Mutator mutator = castedVector.getMutator();
Binary currDictValToWrite = null;
for (int i = 0; i < recordsReadInThisIteration; i++){
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
currDictValToWrite.length());
}
// Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
// and we will go into the else condition below. The readField method of the parent class requires the
// writer index to be set correctly.
int writerIndex = castedVector.getBuffer().writerIndex();
castedVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
} else {
super.readField(recordsToReadInThisPass);
}
// TODO - replace this with fixed binary type in drill
// now we need to write the lengths of each value
int byteLength = dataTypeLengthInBits / 8;
for (int i = 0; i < recordsToReadInThisPass; i++) {
castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
}
}
示例9: testAllFilter
import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testAllFilter() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = eq(name, Binary.fromString("no matches"));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertEquals(new ArrayList<Group>(), found);
}
示例10: keep
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public boolean keep(Binary value) {
if (value == null) {
return false;
}
return value.toStringUsingUTF8().startsWith("p");
}
示例11: readValues
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void readValues(ValuesReader valuesReader, int valueNumber, ParquetLevelReader definitionReader)
{
for (int i = 0; i < valueNumber; i++) {
if (definitionReader.readLevel() == descriptor.getMaxDefinitionLevel()) {
Binary binary = valuesReader.readBytes();
if (binary.length() != 0) {
VARCHAR.writeSlice(blockBuilder, Slices.wrappedBuffer(binary.getBytes()));
continue;
}
}
blockBuilder.appendNull();
}
}
示例12: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value)
{
nulls[fieldIndex] = false;
if (types[fieldIndex] == TIMESTAMP) {
longs[fieldIndex] = ParquetTimestampUtils.getTimestampMillis(value);
}
else {
slices[fieldIndex] = wrappedBuffer(value.getBytes());
}
}
示例13: getTimestampMillis
import parquet.io.api.Binary; //导入依赖的package包/类
/**
* Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
*
* @param timestampBinary INT96 parquet timestamp
* @return timestamp in millis, GMT timezone
*/
public static long getTimestampMillis(Binary timestampBinary)
{
if (timestampBinary.length() != 12) {
throw new PrestoException(HIVE_BAD_DATA, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
}
byte[] bytes = timestampBinary.getBytes();
// little endian encoding - need to invert byte order
long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]);
return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
示例14: testInvalidBinaryLength
import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testInvalidBinaryLength()
{
try {
byte[] invalidLengthBinaryTimestamp = new byte[8];
getTimestampMillis(Binary.fromByteArray(invalidLengthBinaryTimestamp));
}
catch (PrestoException e) {
assertEquals(e.getErrorCode(), HIVE_BAD_DATA.toErrorCode());
assertEquals(e.getMessage(), "Parquet timestamp must be 12 bytes, actual 8");
}
}
示例15: assertTimestampCorrect
import parquet.io.api.Binary; //导入依赖的package包/类
private static void assertTimestampCorrect(String timestampString)
{
Timestamp timestamp = Timestamp.valueOf(timestampString);
Binary timestampBytes = NanoTimeUtils.getNanoTime(timestamp, false).toBinary();
long decodedTimestampMillis = getTimestampMillis(timestampBytes);
assertEquals(decodedTimestampMillis, timestamp.getTime());
}