本文整理汇总了Java中org.apache.hadoop.io.Text.clear方法的典型用法代码示例。如果您正苦于以下问题:Java Text.clear方法的具体用法?Java Text.clear怎么用?Java Text.clear使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text
的用法示例。
在下文中一共展示了Text.clear方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: lowLevelFastqRead
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException {
// ID line
readName.clear();
long skipped = appendLineInto(readName, true);
if (skipped == 0)
return false; // EOF
if (readName.getBytes()[0] != '@')
throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage() + ". Line: " + readName + ". \n");
value.append(readName.getBytes(), 0, readName.getLength());
// sequence
appendLineInto(value, false);
// separator line
appendLineInto(value, false);
// quality
appendLineInto(value, false);
return true;
}
示例2: next
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Reads the next key/value pair from the input for processing.
*/
public boolean next(Text value) throws IOException {
if (pos >= end)
return false; // past end of slice
try {
Text readName = new Text();
value.clear();
// first read of the pair
boolean gotData = lowLevelFastqRead(readName, value);
return gotData;
} catch (EOFException e) {
throw new RuntimeException("unexpected end of file in fastq record at " + makePositionMessage());
}
}
示例3: skipUtfByteOrderMark
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
// Strip BOM(Byte Order Mark)
// Text only support UTF-8, we only need to check UTF-8 BOM
// (0xEF,0xBB,0xBF) at the start of the text stream.
int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
Integer.MAX_VALUE);
int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
// Even we read 3 extra bytes for the first line,
// we won't alter existing behavior (no backwards incompat issue).
// Because the newSize is less than maxLineLength and
// the number of bytes copied to Text is always no more than newSize.
// If the return size from readLine is not less than maxLineLength,
// we will discard the current line and read the next line.
pos += newSize;
int textLength = value.getLength();
byte[] textBytes = value.getBytes();
if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
(textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
// find UTF-8 BOM, strip it.
LOG.info("Found UTF-8 BOM and skipped it");
textLength -= 3;
newSize -= 3;
if (textLength > 0) {
// It may work to use the same buffer and not do the copyBytes
textBytes = value.copyBytes();
value.set(textBytes, 3, textLength);
} else {
value.clear();
}
}
return newSize;
}
示例4: readDefaultLine
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Read a line terminated by one of CR, LF, or CRLF.
*/
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume)
throws IOException {
/* We're reading data from in, but the head of the stream may be
* already buffered in buffer, so we have several cases:
* 1. No newline characters are in the buffer, so we need to copy
* everything and read another buffer from the stream.
* 2. An unambiguously terminated line is in buffer, so we just
* copy to str.
* 3. Ambiguously terminated line is in buffer, i.e. buffer ends
* in CR. In this case we copy everything up to CR to str, but
* we also need to see what follows CR: if it's LF, then we
* need consume LF as well, so next call to readLine will read
* from after that.
* We use a flag prevCharCR to signal if previous character was CR
* and, if it happens to be at the end of the buffer, delay
* consuming it until we have a chance to look at the char that
* follows.
*/
str.clear();
int txtLength = 0; //tracks str.getLength(), as an optimization
int newlineLength = 0; //length of terminating newline
boolean prevCharCR = false; //true of prev char was CR
long bytesConsumed = 0;
do {
int startPosn = bufferPosn; //starting from where we left off the last time
if (bufferPosn >= bufferLength) {
startPosn = bufferPosn = 0;
if (prevCharCR) {
++bytesConsumed; //account for CR from previous read
}
bufferLength = fillBuffer(in, buffer, prevCharCR);
if (bufferLength <= 0) {
break; // EOF
}
}
for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
if (buffer[bufferPosn] == LF) {
newlineLength = (prevCharCR) ? 2 : 1;
++bufferPosn; // at next invocation proceed from following byte
break;
}
if (prevCharCR) { //CR + notLF, we are at notLF
newlineLength = 1;
break;
}
prevCharCR = (buffer[bufferPosn] == CR);
}
int readLength = bufferPosn - startPosn;
if (prevCharCR && newlineLength == 0) {
--readLength; //CR at the end of the buffer
}
bytesConsumed += readLength;
int appendLength = readLength - newlineLength;
if (appendLength > maxLineLength - txtLength) {
appendLength = maxLineLength - txtLength;
}
if (appendLength > 0) {
str.append(buffer, startPosn, appendLength);
txtLength += appendLength;
}
} while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
if (bytesConsumed > Integer.MAX_VALUE) {
throw new IOException("Too many bytes before newline: " + bytesConsumed);
}
return (int)bytesConsumed;
}
示例5: readLine
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Read a utf8 encoded line from a data input stream.
* @param lineReader LineReader to read the line from.
* @param out Text to read into
* @return number of bytes read
* @throws IOException
*/
public static int readLine(LineReader lineReader, Text out)
throws IOException {
out.clear();
return lineReader.readLine(out);
}