本文整理汇总了Java中org.apache.hadoop.io.Text.set方法的典型用法代码示例。如果您正苦于以下问题:Java Text.set方法的具体用法?Java Text.set怎么用?Java Text.set使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text
的用法示例。
在下文中一共展示了Text.set方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: splitKeyVal
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
// Need to find numKeyFields separators
int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
for(int k=1; k<numKeyFields && pos!=-1; k++) {
pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length,
length, separator);
}
try {
if (pos == -1) {
key.set(line, 0, length);
val.set("");
} else {
StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
separator.length);
}
} catch (CharacterCodingException e) {
throw new IOException(StringUtils.stringifyException(e));
}
}
示例2: next
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public synchronized boolean next(Text key, Text value) throws IOException {
numNext++;
if (pos_ >= end_) {
return false;
}
DataOutputBuffer buf = new DataOutputBuffer();
if (!readUntilMatchBegin()) {
return false;
}
if (pos_ >= end_ || !readUntilMatchEnd(buf)) {
return false;
}
// There is only one elem..key/value splitting is not done here.
byte[] record = new byte[buf.getLength()];
System.arraycopy(buf.getData(), 0, record, 0, record.length);
numRecStats(record, 0, record.length);
key.set(record);
value.set("");
return true;
}
示例3: createFakeCredentials
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
throws IOException {
Credentials creds = new Credentials();
byte[] password = new byte[20];
Text kind = new Text();
Text service = new Text();
Text alias = new Text();
for (int i = 0; i < nTok; ++i) {
byte[] identifier = ("idef" + i).getBytes();
r.nextBytes(password);
kind.set("kind" + i);
service.set("service" + i);
alias.set("token" + i);
Token token = new Token(identifier, password, kind, service);
creds.addToken(alias, token);
}
DataOutputBuffer buf = new DataOutputBuffer();
creds.writeTokenStorageToStream(buf);
DataInputBuffer ret = new DataInputBuffer();
ret.reset(buf.getData(), 0, buf.getLength());
return ret;
}
示例4: getRecordReader
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public RecordReader<Text,Text> getRecordReader(InputSplit split,
JobConf job, Reporter reporter) throws IOException {
final IndirectInputFormat.IndirectSplit clSplit =
(IndirectInputFormat.IndirectSplit)split;
return new RecordReader<Text,Text>() {
boolean once = true;
public boolean next(Text key, Text value) {
if (once) {
key.set(clSplit.getPath().toString());
once = false;
return true;
}
return false;
}
public Text createKey() { return new Text(); }
public Text createValue() { return new Text(); }
public long getPos() { return 0; }
public void close() { }
public float getProgress() { return 0.0f; }
};
}
示例5: genRandom
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static void genRandom(Text t, int len, StringBuilder sb) {
sb.setLength(0);
for (int i = 0; i < len; ++i) {
sb.append(Integer.toString(r.nextInt(26) + 10, 36));
}
t.set(sb.toString());
}
示例6: writeOutput
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException {
Path outPath = new Path(output);
if (distribFs.exists(outPath)) {
throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output);
}
Writer writer = SequenceFile.createWriter(distribFs.getConf(),
Writer.file(outPath),
Writer.keyClass(Text.class),
Writer.valueClass(BytesWritable.class),
Writer.compression(SequenceFile.CompressionType.RECORD));
Text key = new Text();
BytesWritable value = new BytesWritable();
long skipped = 0;
long copied = 0;
while (input.hasNext()) {
FileStatus next = input.next();
if (filter(next)) {
key.set(next.getPath().toString());
FSDataInputStream stream = localFs.open(next.getPath());
//CAUTION : this could cause memory overflow
byte[] bytes = IOUtils.toByteArray(stream);
value.set(bytes, 0, bytes.length);
writer.append(key, value);
copied++;
} else {
skipped++;
}
}
writer.close();
System.out.println("Files copied ::" + copied);
System.out.println("Files skipped ::" + skipped);
}
示例7: next
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private boolean next(LongWritable key, Text value) throws IOException {
/**
* 通过readUntilMatch方法查找xml段开始的标签,直到找到了,才开始
* 写xml片段到buffer中去,如readUntilMatch的第二个参数为false则不查找的过
* 程中写入数据到buffer,如果为true的话就边查找边写入
*/
if( fsin.getPos() < end && readUntilMatch(startTag, false)) {
//进入代码段则说明找到了开始标签,现在fsin的指针指在找到的开始标签的
//最后一位上,所以向buffer中写入开始标签
buffer.write(startTag);
try {
/**
* 在fsin中去查找结束标签边查找边记录直到找到结束标签为止
*/
if(readUntilMatch(endTag, true)) {
/**
* 找到标签后把start标签的指针位置的偏移量赋值给key
* 把buffer中记录的整个xml完整片断赋值给value
*/
key.set(fsin.getPos() - buffer.getLength());
value.set(buffer.getData(), 0, buffer.getLength());
return true;
}
} finally {
buffer.reset();
}
}
return false;
}
示例8: validateFinalListing
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Validate the final resulting path listing. Checks if there are duplicate entries. If preserving ACLs, checks that
* file system can support ACLs. If preserving XAttrs, checks that file system can support XAttrs.
*
* @param pathToListFile path listing build by doBuildListing
* @param options Input options to S3MapReduceCp
* @throws IOException Any issues while checking for duplicates and throws
* @throws DuplicateFileException if there are duplicates
*/
private void validateFinalListing(Path pathToListFile, S3MapReduceCpOptions options)
throws DuplicateFileException, IOException {
Configuration config = getConf();
FileSystem fs = pathToListFile.getFileSystem(config);
Path sortedList = sortListing(fs, config, pathToListFile);
SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(sortedList));
try {
Text lastKey = new Text("*"); // source relative path can never hold *
CopyListingFileStatus lastFileStatus = new CopyListingFileStatus();
Text currentKey = new Text();
while (reader.next(currentKey)) {
if (currentKey.equals(lastKey)) {
CopyListingFileStatus currentFileStatus = new CopyListingFileStatus();
reader.getCurrentValue(currentFileStatus);
throw new DuplicateFileException("File "
+ lastFileStatus.getPath()
+ " and "
+ currentFileStatus.getPath()
+ " would cause duplicates. Aborting");
}
reader.getCurrentValue(lastFileStatus);
lastKey.set(currentKey);
}
} finally {
IOUtils.closeStream(reader);
}
}
示例9: next
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
Text tKey = key;
Text tValue = value;
if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
return false;
}
tKey.set(innerKey.toString());
tValue.set(innerValue.toString());
return true;
}
示例10: skipUtfByteOrderMark
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
// Strip BOM(Byte Order Mark)
// Text only support UTF-8, we only need to check UTF-8 BOM
// (0xEF,0xBB,0xBF) at the start of the text stream.
int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
Integer.MAX_VALUE);
int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
// Even we read 3 extra bytes for the first line,
// we won't alter existing behavior (no backwards incompat issue).
// Because the newSize is less than maxLineLength and
// the number of bytes copied to Text is always no more than newSize.
// If the return size from readLine is not less than maxLineLength,
// we will discard the current line and read the next line.
pos += newSize;
int textLength = value.getLength();
byte[] textBytes = value.getBytes();
if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
(textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
// find UTF-8 BOM, strip it.
LOG.info("Found UTF-8 BOM and skipped it");
textLength -= 3;
newSize -= 3;
if (textLength > 0) {
// It may work to use the same buffer and not do the copyBytes
textBytes = value.copyBytes();
value.set(textBytes, 3, textLength);
} else {
value.clear();
}
}
return newSize;
}
示例11: next
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public boolean next(Text key, Text value) {
if (name != null) {
key.set(name.getName());
name = null;
return true;
}
return false;
}
示例12: generateSentence
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int generateSentence(Text t, int noWords) {
sentence.setLength(0);
--noWords;
for (int i = 0; i < noWords; ++i) {
sentence.append(words[r.nextInt(words.length)]);
sentence.append(" ");
}
if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
t.set(sentence.toString());
return sentence.length();
}
示例13: createMapFile
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static void createMapFile(Configuration conf, FileSystem fs, Path path,
CompressionCodec codec, CompressionType type, int records) throws IOException {
MapFile.Writer writer =
new MapFile.Writer(conf, path,
MapFile.Writer.keyClass(Text.class),
MapFile.Writer.valueClass(Text.class),
MapFile.Writer.compression(type, codec));
Text key = new Text();
for (int j = 0; j < records; j++) {
key.set(String.format("%03d", j));
writer.append(key, key);
}
writer.close();
}
示例14: nextKeyValue
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public boolean nextKeyValue() {
if (name != null) {
key = new Text();
key.set(name.getName());
name = null;
return true;
}
return false;
}
示例15: writeSimpleSrc
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
int srcs) throws IOException {
SequenceFile.Writer out[] = null;
Path[] src = new Path[srcs];
try {
out = createWriters(testdir, conf, srcs, src);
final int capacity = srcs * 2 + 1;
Text key = new Text();
key.set("ignored");
Text val = new Text();
for (int k = 0; k < capacity; ++k) {
for (int i = 0; i < srcs; ++i) {
val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
"\t" + Integer.toString(10 * k + i));
out[i].append(key, val);
if (i == k) {
// add duplicate key
out[i].append(key, val);
}
}
}
} finally {
if (out != null) {
for (int i = 0; i < srcs; ++i) {
if (out[i] != null)
out[i].close();
}
}
}
return src;
}