当前位置: 首页>>代码示例>>Java>>正文


Java Text.getLength方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.Text.getLength方法的典型用法代码示例。如果您正苦于以下问题:Java Text.getLength方法的具体用法?Java Text.getLength怎么用?Java Text.getLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.Text的用法示例。


在下文中一共展示了Text.getLength方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: find

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
 *
 * @param  text  the text being searched
 * @param  delimiter the delimiter
 * @param  start the index to start searching
 * @return      the first occurrence of delimiter, starting the search at the specified index
 */
public int find(Text text, byte delimiter, int start) {
  int len = text.getLength();
  int p = start;
  byte[] bytes = text.getBytes();
  boolean inQuotes = false;
  while (p < len) {
    if ('\"' == bytes[p]) {
      inQuotes = !inQuotes;
    }
    if (!inQuotes && bytes[p] == delimiter) {
      return p;
    }
    p++;
  }
  return -1;
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:25,代码来源:DrillTextRecordReader.java

示例2: map

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
	String[] parsedTriple = rdfParser.parseTriple(value.toString());
	if (parsedTriple != null) {
		// Convert liters to Pig Types, if possible
		parsedTriple[2] = Util.toPigTypes(parsedTriple[2]);
		// Use Predicate for Vertical Partitioning
		context.write(new Text(parsedTriple[1]), new TextPair(parsedTriple[0], parsedTriple[2]));
		context.getCounter("RDF Dataset Properties", VALID_TRIPLES).increment(1);
	} else {
		if (value.getLength() == 0 || value.toString().startsWith("@")) {
			System.out.println("IGNORING: " + value);
			context.getCounter("RDF Dataset Properties", IGNORED_LINES).increment(1);
		} else {
			System.out.println("DISCARDED: " + value);
			context.getCounter("RDF Dataset Properties", INVALID_TRIPLES).increment(1);
		}
	}
}
 
开发者ID:aschaetzle,项目名称:PigSPARQL,代码行数:20,代码来源:VPMapper.java

示例3: map

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
	String[] parsedTriple = rdfParser.parseTriple(value.toString());
	if (parsedTriple != null) {
		// Convert liters to Pig Types, if possible
		parsedTriple[2] = Util.toPigTypes(parsedTriple[2]);
		// Use Predicate for Vertical Partitioning
		multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[2]),
				Util.generateFileName(parsedTriple[1]));
		// Write all parsed triples also to "inputData" for queries where Predicate is not known
		multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[1] + "\t" + parsedTriple[2]),
				Util.generateFileName("inputData"));
		context.getCounter("RDF Dataset Properties", VALID_TRIPLES).increment(1);
	} else {
		if (value.getLength() == 0 || value.toString().startsWith("@")) {
			System.out.println("IGNORING: " + value);
			context.getCounter("RDF Dataset Properties", IGNORED_LINES).increment(1);
		} else {
			System.out.println("DISCARDED: " + value);
			context.getCounter("RDF Dataset Properties", INVALID_TRIPLES).increment(1);
		}
	}
}
 
开发者ID:aschaetzle,项目名称:PigSPARQL,代码行数:24,代码来源:VPMapOnlyMapper.java

示例4: writeString

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Write a String as a VInt n, followed by n Bytes as in Text format.
 * 
 * @param out
 * @param s
 * @throws IOException
 */
public static void writeString(DataOutput out, String s) throws IOException {
  if (s != null) {
    Text text = new Text(s);
    byte[] buffer = text.getBytes();
    int len = text.getLength();
    writeVInt(out, len);
    out.write(buffer, 0, len);
  } else {
    writeVInt(out, -1);
  }
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:19,代码来源:Utils.java

示例5: setSafeValue

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final byte[] valueBytes = value.getBytes();
  final int len = value.getLength();
  ((NullableVarCharVector) outputVV).getMutator().setSafe(outputIndex, valueBytes, 0, len);
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:HiveFieldConverter.java

示例6: map

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
protected void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

    Logger.println("[map read line]: " + count++);
    if (value.getLength()==0) {
        Logger.println("[map empty line]: empty line");
        return;
    }
    List<ProbeJson> probeJsons = gson.fromJson(
            value.toString(), new TypeToken<List<ProbeJson>>(){}.getType());
    Logger.println("[probes converted]: convert to java list, size =  "+probeJsons.size());

    for (ProbeJson prob:probeJsons) {

        long time = -1L;
        try {
            System.out.println(prob.getTime());
            time = DateFormatter.getMillis(prob.getTime());
        }catch (Exception e) {
            Logger.println("[warn]: time format error!!");
        }

        if (time>=0) {
            for (PhoneJson phoneJson : prob.getData()) {
                Logger.println("[phone data]: one phone data with mac address: " +phoneJson.getMac() );
                Logger.println("[phone data]: one phone data with time: " +phoneJson.getTime() );
                phoneJson.setTime(time);
                phoneAsKey.set(phoneJson.getMac());
                context.write(phoneAsKey, phoneJson);
                Logger.println("[map write to combiner]");
            }
        }

    }
}
 
开发者ID:cuiods,项目名称:WIFIProbe,代码行数:37,代码来源:CustomerKeyMapper.java

示例7: map

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(Text key, Text value,
                Context context) throws IOException,InterruptedException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    // Generate the key/value 
    int noWordsKey = minWordsInKey + 
      (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
    int noWordsValue = minWordsInValue + 
      (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
    Text keyWords = generateSentence(noWordsKey);
    Text valueWords = generateSentence(noWordsValue);
    
    // Write the sentence 
    context.write(keyWords, valueWords);
    
    numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
    
    // Update counters, progress etc.
    context.getCounter(Counters.BYTES_WRITTEN).increment(
              keyWords.getLength() + valueWords.getLength());
    context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
    if (++itemCount % 200 == 0) {
      context.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  context.setStatus("done with " + itemCount + " records.");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:32,代码来源:RandomTextWriter.java

示例8: skipUtfByteOrderMark

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
  // Strip BOM(Byte Order Mark)
  // Text only support UTF-8, we only need to check UTF-8 BOM
  // (0xEF,0xBB,0xBF) at the start of the text stream.
  int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
      Integer.MAX_VALUE);
  int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
  // Even we read 3 extra bytes for the first line,
  // we won't alter existing behavior (no backwards incompat issue).
  // Because the newSize is less than maxLineLength and
  // the number of bytes copied to Text is always no more than newSize.
  // If the return size from readLine is not less than maxLineLength,
  // we will discard the current line and read the next line.
  pos += newSize;
  int textLength = value.getLength();
  byte[] textBytes = value.getBytes();
  if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
      (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
    // find UTF-8 BOM, strip it.
    LOG.info("Found UTF-8 BOM and skipped it");
    textLength -= 3;
    newSize -= 3;
    if (textLength > 0) {
      // It may work to use the same buffer and not do the copyBytes
      textBytes = value.copyBytes();
      value.set(textBytes, 3, textLength);
    } else {
      value.clear();
    }
  }
  return newSize;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:33,代码来源:LineRecordReader.java

示例9: findPartition

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
int findPartition(Text key) {
  int level = getLevel();
  if (key.getLength() <= level) {
    return child[0].findPartition(key);
  }
  return child[key.getBytes()[level] & 0xff].findPartition(key);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:8,代码来源:TeraSort.java

示例10: buildTrie

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = (byte) 255;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:41,代码来源:TeraSort.java

示例11: reduce

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text,Text> out, Reporter reporter)
    throws IOException {
  int vc = 0;
  final int vlen;
  final int preRec = nRec;
  final int vcCheck, recCheck;
  ((StringBuilder)fmt.out()).setLength(keylen);
  if (25 == key.getLength()) {
    // tagged record
    recCheck = 1;   // expect only 1 record
    switch ((char)key.getBytes()[0]) {
      case 'A':
        vlen = getValLen(++aKey, nMaps) - 128;
        vcCheck = aKey; // expect eq id
        break;
      case 'B':
        vlen = getValLen(++bKey, nMaps);
        vcCheck = bKey; // expect eq id
        break;
      default:
        vlen = vcCheck = -1;
        fail("Unexpected tag on record: " + ((char)key.getBytes()[24]));
    }
    kb.set((char)key.getBytes()[0] + fmt.format(tagfmt,vcCheck).toString());
  } else {
    kb.set(fmt.format(tagfmt, ++nKey).toString());
    vlen = 1000;
    recCheck = nMaps;                      // expect 1 rec per map
    vcCheck = (nMaps * (nMaps - 1)) >>> 1; // expect eq sum(id)
  }
  assertEquals(kb, key);
  while (values.hasNext()) {
    final Text val = values.next();
    // increment vc by map ID assoc w/ val
    vc += val.getBytes()[0];
    // verify that all the fixed characters 'V' match
    assertEquals(0, WritableComparator.compareBytes(
          vb.getBytes(), 1, vlen - 1,
          val.getBytes(), 1, val.getLength() - 1));
    out.collect(key, val);
    ++nRec;
  }
  assertEquals("Bad rec count for " + key, recCheck, nRec - preRec);
  assertEquals("Bad rec group for " + key, vcCheck, vc);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:48,代码来源:TestReduceFetchFromPartialMem.java


注:本文中的org.apache.hadoop.io.Text.getLength方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。