本文整理汇总了Java中org.apache.hadoop.io.Text.getBytes方法的典型用法代码示例。如果您正苦于以下问题:Java Text.getBytes方法的具体用法?Java Text.getBytes怎么用?Java Text.getBytes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text
的用法示例。
在下文中一共展示了Text.getBytes方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: find
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
*
* @param text the text being searched
* @param delimiter the delimiter
* @param start the index to start searching
* @return the first occurrence of delimiter, starting the search at the specified index
*/
public int find(Text text, byte delimiter, int start) {
int len = text.getLength();
int p = start;
byte[] bytes = text.getBytes();
boolean inQuotes = false;
while (p < len) {
if ('\"' == bytes[p]) {
inQuotes = !inQuotes;
}
if (!inQuotes && bytes[p] == delimiter) {
return p;
}
p++;
}
return -1;
}
示例2: lowLevelFastqRead
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException {
// ID line
readName.clear();
long skipped = appendLineInto(readName, true);
if (skipped == 0)
return false; // EOF
if (readName.getBytes()[0] != '@')
throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage() + ". Line: " + readName + ". \n");
value.append(readName.getBytes(), 0, readName.getLength());
// sequence
appendLineInto(value, false);
// separator line
appendLineInto(value, false);
// quality
appendLineInto(value, false);
return true;
}
示例3: writeString
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Write a String as a VInt n, followed by n Bytes as in Text format.
*
* @param out
* @param s
* @throws IOException
*/
public static void writeString(DataOutput out, String s) throws IOException {
if (s != null) {
Text text = new Text(s);
byte[] buffer = text.getBytes();
int len = text.getLength();
writeVInt(out, len);
out.write(buffer, 0, len);
} else {
writeVInt(out, -1);
}
}
示例4: setSafeValue
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
final byte[] valueBytes = value.getBytes();
final int len = value.getLength();
((NullableVarCharVector) outputVV).getMutator().setSafe(outputIndex, valueBytes, 0, len);
}
示例5: skipUtfByteOrderMark
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
// Strip BOM(Byte Order Mark)
// Text only support UTF-8, we only need to check UTF-8 BOM
// (0xEF,0xBB,0xBF) at the start of the text stream.
int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
Integer.MAX_VALUE);
int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
// Even we read 3 extra bytes for the first line,
// we won't alter existing behavior (no backwards incompat issue).
// Because the newSize is less than maxLineLength and
// the number of bytes copied to Text is always no more than newSize.
// If the return size from readLine is not less than maxLineLength,
// we will discard the current line and read the next line.
pos += newSize;
int textLength = value.getLength();
byte[] textBytes = value.getBytes();
if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
(textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
// find UTF-8 BOM, strip it.
LOG.info("Found UTF-8 BOM and skipped it");
textLength -= 3;
newSize -= 3;
if (textLength > 0) {
// It may work to use the same buffer and not do the copyBytes
textBytes = value.copyBytes();
value.set(textBytes, 3, textLength);
} else {
value.clear();
}
}
return newSize;
}
示例6: buildTrie
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Given a sorted set of cut points, build a trie that will find the correct
* partition quickly.
* @param splits the list of cut points
* @param lower the lower bound of partitions 0..numPartitions-1
* @param upper the upper bound of partitions 0..numPartitions-1
* @param prefix the prefix that we have already checked against
* @param maxDepth the maximum depth we will build a trie for
* @return the trie node that will divide the splits correctly
*/
private static TrieNode buildTrie(Text[] splits, int lower, int upper,
Text prefix, int maxDepth) {
int depth = prefix.getLength();
if (depth >= maxDepth || lower == upper) {
return new LeafTrieNode(depth, splits, lower, upper);
}
InnerTrieNode result = new InnerTrieNode(depth);
Text trial = new Text(prefix);
// append an extra byte on to the prefix
trial.append(new byte[1], 0, 1);
int currentBound = lower;
for(int ch = 0; ch < 255; ++ch) {
trial.getBytes()[depth] = (byte) (ch + 1);
lower = currentBound;
while (currentBound < upper) {
if (splits[currentBound].compareTo(trial) >= 0) {
break;
}
currentBound += 1;
}
trial.getBytes()[depth] = (byte) ch;
result.child[ch] = buildTrie(splits, lower, currentBound, trial,
maxDepth);
}
// pick up the rest
trial.getBytes()[depth] = (byte) 255;
result.child[255] = buildTrie(splits, currentBound, upper, trial,
maxDepth);
return result;
}
示例7: getPartition
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public int getPartition(Text key, Text value, int numPartitions) {
byte[] bytes = key.getBytes();
int len = Math.min(PREFIX_LENGTH, key.getLength());
int prefix = 0;
for(int i=0; i < len; ++i) {
prefix = (prefix << 8) | (0xff & bytes[i]);
}
return prefix / prefixesPerReduce;
}
示例8: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Convert a line of TSV text into an HBase table row after transforming the
* values by multiplying them by 3.
*/
@Override
public void map(LongWritable offset, Text value, Context context)
throws IOException {
byte[] family = Bytes.toBytes("FAM");
final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };
// do some basic line parsing
byte[] lineBytes = value.getBytes();
String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");
// create the rowKey and Put
ImmutableBytesWritable rowKey =
new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
Put put = new Put(rowKey.copyBytes());
put.setDurability(Durability.SKIP_WAL);
//The value should look like this: VALUE1 or VALUE2. Let's multiply
//the integer by 3
for(int i = 1; i < valueTokens.length; i++) {
String prefix = valueTokens[i].substring(0, "VALUE".length());
String suffix = valueTokens[i].substring("VALUE".length());
String newValue = prefix + Integer.parseInt(suffix) * 3;
KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
qualifiers[i-1], Bytes.toBytes(newValue));
put.add(kv);
}
try {
context.write(rowKey, put);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
示例9: readLeadingInt
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static int readLeadingInt(Text txt) throws IOException {
DataInputStream in =
new DataInputStream(new ByteArrayInputStream(txt.getBytes()));
return in.readInt();
}
示例10: reduce
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text,Text> out, Reporter reporter)
throws IOException {
int vc = 0;
final int vlen;
final int preRec = nRec;
final int vcCheck, recCheck;
((StringBuilder)fmt.out()).setLength(keylen);
if (25 == key.getLength()) {
// tagged record
recCheck = 1; // expect only 1 record
switch ((char)key.getBytes()[0]) {
case 'A':
vlen = getValLen(++aKey, nMaps) - 128;
vcCheck = aKey; // expect eq id
break;
case 'B':
vlen = getValLen(++bKey, nMaps);
vcCheck = bKey; // expect eq id
break;
default:
vlen = vcCheck = -1;
fail("Unexpected tag on record: " + ((char)key.getBytes()[24]));
}
kb.set((char)key.getBytes()[0] + fmt.format(tagfmt,vcCheck).toString());
} else {
kb.set(fmt.format(tagfmt, ++nKey).toString());
vlen = 1000;
recCheck = nMaps; // expect 1 rec per map
vcCheck = (nMaps * (nMaps - 1)) >>> 1; // expect eq sum(id)
}
assertEquals(kb, key);
while (values.hasNext()) {
final Text val = values.next();
// increment vc by map ID assoc w/ val
vc += val.getBytes()[0];
// verify that all the fixed characters 'V' match
assertEquals(0, WritableComparator.compareBytes(
vb.getBytes(), 1, vlen - 1,
val.getBytes(), 1, val.getLength() - 1));
out.collect(key, val);
++nRec;
}
assertEquals("Bad rec count for " + key, recCheck, nRec - preRec);
assertEquals("Bad rec group for " + key, vcCheck, vc);
}