本文整理汇总了Java中org.apache.hadoop.io.Text.getLength方法的典型用法代码示例。如果您正苦于以下问题:Java Text.getLength方法的具体用法?Java Text.getLength怎么用?Java Text.getLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text
的用法示例。
在下文中一共展示了Text.getLength方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: find
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
*
* @param text the text being searched
* @param delimiter the delimiter
* @param start the index to start searching
* @return the first occurrence of delimiter, starting the search at the specified index
*/
public int find(Text text, byte delimiter, int start) {
int len = text.getLength();
int p = start;
byte[] bytes = text.getBytes();
boolean inQuotes = false;
while (p < len) {
if ('\"' == bytes[p]) {
inQuotes = !inQuotes;
}
if (!inQuotes && bytes[p] == delimiter) {
return p;
}
p++;
}
return -1;
}
示例2: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] parsedTriple = rdfParser.parseTriple(value.toString());
if (parsedTriple != null) {
// Convert liters to Pig Types, if possible
parsedTriple[2] = Util.toPigTypes(parsedTriple[2]);
// Use Predicate for Vertical Partitioning
context.write(new Text(parsedTriple[1]), new TextPair(parsedTriple[0], parsedTriple[2]));
context.getCounter("RDF Dataset Properties", VALID_TRIPLES).increment(1);
} else {
if (value.getLength() == 0 || value.toString().startsWith("@")) {
System.out.println("IGNORING: " + value);
context.getCounter("RDF Dataset Properties", IGNORED_LINES).increment(1);
} else {
System.out.println("DISCARDED: " + value);
context.getCounter("RDF Dataset Properties", INVALID_TRIPLES).increment(1);
}
}
}
示例3: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] parsedTriple = rdfParser.parseTriple(value.toString());
if (parsedTriple != null) {
// Convert liters to Pig Types, if possible
parsedTriple[2] = Util.toPigTypes(parsedTriple[2]);
// Use Predicate for Vertical Partitioning
multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[2]),
Util.generateFileName(parsedTriple[1]));
// Write all parsed triples also to "inputData" for queries where Predicate is not known
multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[1] + "\t" + parsedTriple[2]),
Util.generateFileName("inputData"));
context.getCounter("RDF Dataset Properties", VALID_TRIPLES).increment(1);
} else {
if (value.getLength() == 0 || value.toString().startsWith("@")) {
System.out.println("IGNORING: " + value);
context.getCounter("RDF Dataset Properties", IGNORED_LINES).increment(1);
} else {
System.out.println("DISCARDED: " + value);
context.getCounter("RDF Dataset Properties", INVALID_TRIPLES).increment(1);
}
}
}
示例4: writeString
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Write a String as a VInt n, followed by n Bytes as in Text format.
*
* @param out
* @param s
* @throws IOException
*/
public static void writeString(DataOutput out, String s) throws IOException {
if (s != null) {
Text text = new Text(s);
byte[] buffer = text.getBytes();
int len = text.getLength();
writeVInt(out, len);
out.write(buffer, 0, len);
} else {
writeVInt(out, -1);
}
}
示例5: setSafeValue
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
final byte[] valueBytes = value.getBytes();
final int len = value.getLength();
((NullableVarCharVector) outputVV).getMutator().setSafe(outputIndex, valueBytes, 0, len);
}
示例6: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
protected void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
Logger.println("[map read line]: " + count++);
if (value.getLength()==0) {
Logger.println("[map empty line]: empty line");
return;
}
List<ProbeJson> probeJsons = gson.fromJson(
value.toString(), new TypeToken<List<ProbeJson>>(){}.getType());
Logger.println("[probes converted]: convert to java list, size = "+probeJsons.size());
for (ProbeJson prob:probeJsons) {
long time = -1L;
try {
System.out.println(prob.getTime());
time = DateFormatter.getMillis(prob.getTime());
}catch (Exception e) {
Logger.println("[warn]: time format error!!");
}
if (time>=0) {
for (PhoneJson phoneJson : prob.getData()) {
Logger.println("[phone data]: one phone data with mac address: " +phoneJson.getMac() );
Logger.println("[phone data]: one phone data with time: " +phoneJson.getTime() );
phoneJson.setTime(time);
phoneAsKey.set(phoneJson.getMac());
context.write(phoneAsKey, phoneJson);
Logger.println("[map write to combiner]");
}
}
}
}
示例7: map
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Given an output filename, write a bunch of random records to it.
*/
public void map(Text key, Text value,
Context context) throws IOException,InterruptedException {
int itemCount = 0;
while (numBytesToWrite > 0) {
// Generate the key/value
int noWordsKey = minWordsInKey +
(wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
int noWordsValue = minWordsInValue +
(wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
Text keyWords = generateSentence(noWordsKey);
Text valueWords = generateSentence(noWordsValue);
// Write the sentence
context.write(keyWords, valueWords);
numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
// Update counters, progress etc.
context.getCounter(Counters.BYTES_WRITTEN).increment(
keyWords.getLength() + valueWords.getLength());
context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
if (++itemCount % 200 == 0) {
context.setStatus("wrote record " + itemCount + ". " +
numBytesToWrite + " bytes left.");
}
}
context.setStatus("done with " + itemCount + " records.");
}
示例8: skipUtfByteOrderMark
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
// Strip BOM(Byte Order Mark)
// Text only support UTF-8, we only need to check UTF-8 BOM
// (0xEF,0xBB,0xBF) at the start of the text stream.
int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
Integer.MAX_VALUE);
int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
// Even we read 3 extra bytes for the first line,
// we won't alter existing behavior (no backwards incompat issue).
// Because the newSize is less than maxLineLength and
// the number of bytes copied to Text is always no more than newSize.
// If the return size from readLine is not less than maxLineLength,
// we will discard the current line and read the next line.
pos += newSize;
int textLength = value.getLength();
byte[] textBytes = value.getBytes();
if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
(textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
// find UTF-8 BOM, strip it.
LOG.info("Found UTF-8 BOM and skipped it");
textLength -= 3;
newSize -= 3;
if (textLength > 0) {
// It may work to use the same buffer and not do the copyBytes
textBytes = value.copyBytes();
value.set(textBytes, 3, textLength);
} else {
value.clear();
}
}
return newSize;
}
示例9: findPartition
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
int findPartition(Text key) {
int level = getLevel();
if (key.getLength() <= level) {
return child[0].findPartition(key);
}
return child[key.getBytes()[level] & 0xff].findPartition(key);
}
示例10: buildTrie
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
* Given a sorted set of cut points, build a trie that will find the correct
* partition quickly.
* @param splits the list of cut points
* @param lower the lower bound of partitions 0..numPartitions-1
* @param upper the upper bound of partitions 0..numPartitions-1
* @param prefix the prefix that we have already checked against
* @param maxDepth the maximum depth we will build a trie for
* @return the trie node that will divide the splits correctly
*/
private static TrieNode buildTrie(Text[] splits, int lower, int upper,
Text prefix, int maxDepth) {
int depth = prefix.getLength();
if (depth >= maxDepth || lower == upper) {
return new LeafTrieNode(depth, splits, lower, upper);
}
InnerTrieNode result = new InnerTrieNode(depth);
Text trial = new Text(prefix);
// append an extra byte on to the prefix
trial.append(new byte[1], 0, 1);
int currentBound = lower;
for(int ch = 0; ch < 255; ++ch) {
trial.getBytes()[depth] = (byte) (ch + 1);
lower = currentBound;
while (currentBound < upper) {
if (splits[currentBound].compareTo(trial) >= 0) {
break;
}
currentBound += 1;
}
trial.getBytes()[depth] = (byte) ch;
result.child[ch] = buildTrie(splits, lower, currentBound, trial,
maxDepth);
}
// pick up the rest
trial.getBytes()[depth] = (byte) 255;
result.child[255] = buildTrie(splits, currentBound, upper, trial,
maxDepth);
return result;
}
示例11: reduce
import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text,Text> out, Reporter reporter)
throws IOException {
int vc = 0;
final int vlen;
final int preRec = nRec;
final int vcCheck, recCheck;
((StringBuilder)fmt.out()).setLength(keylen);
if (25 == key.getLength()) {
// tagged record
recCheck = 1; // expect only 1 record
switch ((char)key.getBytes()[0]) {
case 'A':
vlen = getValLen(++aKey, nMaps) - 128;
vcCheck = aKey; // expect eq id
break;
case 'B':
vlen = getValLen(++bKey, nMaps);
vcCheck = bKey; // expect eq id
break;
default:
vlen = vcCheck = -1;
fail("Unexpected tag on record: " + ((char)key.getBytes()[24]));
}
kb.set((char)key.getBytes()[0] + fmt.format(tagfmt,vcCheck).toString());
} else {
kb.set(fmt.format(tagfmt, ++nKey).toString());
vlen = 1000;
recCheck = nMaps; // expect 1 rec per map
vcCheck = (nMaps * (nMaps - 1)) >>> 1; // expect eq sum(id)
}
assertEquals(kb, key);
while (values.hasNext()) {
final Text val = values.next();
// increment vc by map ID assoc w/ val
vc += val.getBytes()[0];
// verify that all the fixed characters 'V' match
assertEquals(0, WritableComparator.compareBytes(
vb.getBytes(), 1, vlen - 1,
val.getBytes(), 1, val.getLength() - 1));
out.collect(key, val);
++nRec;
}
assertEquals("Bad rec count for " + key, recCheck, nRec - preRec);
assertEquals("Bad rec group for " + key, vcCheck, vc);
}