本文整理汇总了Java中org.apache.hadoop.hive.serde2.columnar.BytesRefWritable类的典型用法代码示例。如果您正苦于以下问题:Java BytesRefWritable类的具体用法?Java BytesRefWritable怎么用?Java BytesRefWritable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
BytesRefWritable类属于org.apache.hadoop.hive.serde2.columnar包,在下文中一共展示了BytesRefWritable类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSampleData
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private List<Object> getSampleData(RCFile.Reader reader) throws Exception {
List<Object> sampleData = new ArrayList<Object>();
LongWritable rowID = new LongWritable(0);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
BytesRefWritable brw = null;
StringBuilder builder = new StringBuilder();
builder.append("{");
for (int i = 0; i < cols.size() - 1; i++) {
brw = cols.get(i);
builder.append("\"col" + i + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\",");
}
brw = cols.get(cols.size() - 1);
builder.append("\"col" + (cols.size() - 1) + "\":" + "\"" + Bytes.toString(brw.getData(), brw.getStart(),
brw.getLength()) + "\"}");
sampleData.add(builder.toString());
}
return sampleData;
}
示例2: createRCFile
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void createRCFile(final String fileName, final int numRecords,
final int maxColumns) throws IOException {
// Write the sequence file
SequenceFile.Metadata metadata = getMetadataForRCFile();
Configuration conf = new Configuration();
conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null,
metadata, null);
for (int row = 0; row < numRecords; row++) {
BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
dataWrite.resetValid(maxColumns);
for (int column = 0; column < maxColumns; column++) {
Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
sampleText.write(dataOutput);
dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
}
rcFileWriter.append(dataWrite);
}
rcFileWriter.close();
}
示例3: parseBooleanColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseBooleanColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseBooleanColumn(column, bytes, start, length);
}
}
示例4: parseLongColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseLongColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseLongColumn(column, bytes, start, length);
}
}
示例5: parseDoubleColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseDoubleColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseDoubleColumn(column, bytes, start, length);
}
}
示例6: parseStringColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseStringColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseStringColumn(column, bytes, start, length);
}
}
示例7: parseObjectColumn
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private void parseObjectColumn(int column)
{
// don't include column number in message because it causes boxing which is expensive here
checkArgument(!isPartitionColumn[column], "Column is a partition key");
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
}
else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseObjectColumn(column, bytes, start, length);
}
}
示例8: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int batchSize = block.getPositionCount();
boolean[] isNull = new boolean[batchSize];
long[] vector = new long[batchSize];
for (int i = 0; i < batchSize; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
byte[] bytes = writable.getData();
int start = writable.getStart();
int length = writable.getLength();
if (length == 0 || isNull(bytes, start, length)) {
isNull[i] = true;
}
else {
vector[i] = parseLong(bytes, start, length);
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedLongArray(vector));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
示例9: load
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
@Override
public void load(LazyFixedWidthBlock block)
{
if (loaded) {
return;
}
try {
BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
int positionInBatch = batch.getPositionInBatch();
int positionCount = block.getPositionCount();
boolean[] isNull = new boolean[positionCount];
boolean[] vector = new boolean[positionCount];
for (int i = 0; i < positionCount; i++) {
BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);
int length = writable.getLength();
if (length != 0) {
byte[] bytes = writable.getData();
int start = writable.getStart();
vector[i] = bytes[start] != 0;
}
else {
isNull[i] = true;
}
}
block.setNullVector(isNull);
block.setRawSlice(wrappedBooleanArray(vector, 0, positionCount));
loaded = true;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
示例10: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
示例11: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
CompressionCodec codec, int columnCount) throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
bytes.resetValid(columnRandom.length);
for (int j = 0; j < columnRandom.length; j++) {
columnRandom[j]= "Sample value".getBytes();
bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
}
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}
示例12: readRowWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private boolean readRowWise(final RCFile.Reader reader, final Record record)
throws IOException {
LongWritable rowID = new LongWritable();
while (true) {
boolean next;
try {
next = reader.next(rowID);
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable();
rowBatchBytes.resetValid(columns.size());
reader.getCurrentRow(rowBatchBytes);
// Read all the columns configured and set it in the output record
for (RCFileColumn rcColumn : columns) {
BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField());
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes));
}
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
return true;
}
示例13: readColumnWise
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private boolean readColumnWise(RCFile.Reader reader, Record record) throws IOException {
for (RCFileColumn rcColumn : columns) {
reader.sync(0);
reader.resetBuffer();
while (true) {
boolean next;
try {
next = reader.nextBlock();
} catch (EOFException ex) {
// We have hit EOF of the stream
break;
}
if (!next) {
break;
}
BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null);
for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) {
incrementNumRecords();
Record outputRecord = record.copy();
BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex);
outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, rowBytes));
// pass record to next command in chain:
if (!getChild().process(outputRecord)) {
return false;
}
}
}
}
return true;
}
示例14: updateColumnValue
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private Writable updateColumnValue(RCFileColumn column, BytesRefWritable bytesRef) throws IOException {
Writable newColumnValue = column.newWritable();
// Small optimization to bypass DataInput read if the column writable is
// BytesRefWritable
if (newColumnValue.getClass() == BytesRefWritable.class) {
newColumnValue = bytesRef;
} else {
byte[] currentRowBytes = Arrays.copyOfRange(bytesRef.getData(),
bytesRef.getStart(), bytesRef.getStart() + bytesRef.getLength());
DataInput dataInput = ByteStreams.newDataInput(currentRowBytes);
newColumnValue.readFields(dataInput);
}
return newColumnValue;
}
示例15: writeRCFileTest
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; //导入依赖的package包/类
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file,
int columnNum, CompressionCodec codec, int columnCount)
throws IOException {
fs.delete(file, true);
int rowsWritten = 0;
resetRandomGenerators();
RCFileOutputFormat.setColumnNumber(conf, columnNum);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);
byte[][] columnRandom;
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes, columnCount);
rowsWritten++;
writer.append(bytes);
}
writer.close();
return rowsWritten;
}