Java Text.getBytes方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.Text.getBytes方法的典型用法代码示例。如果您正苦于以下问题：Java Text.getBytes方法的具体用法？Java Text.getBytes怎么用？Java Text.getBytes使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.Text的用法示例。

在下文中一共展示了Text.getBytes方法的10个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: find

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
 *
 * @param  text  the text being searched
 * @param  delimiter the delimiter
 * @param  start the index to start searching
 * @return      the first occurrence of delimiter, starting the search at the specified index
 */
public int find(Text text, byte delimiter, int start) {
  int len = text.getLength();
  int p = start;
  byte[] bytes = text.getBytes();
  boolean inQuotes = false;
  while (p < len) {
    if ('\"' == bytes[p]) {
      inQuotes = !inQuotes;
    }
    if (!inQuotes && bytes[p] == delimiter) {
      return p;
    }
    p++;
  }
  return -1;
}

开发者ID:skhalifa，项目名称:QDrill，代码行数:25，代码来源:DrillTextRecordReader.java

示例2: lowLevelFastqRead

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException {
    // ID line
    readName.clear();
    long skipped = appendLineInto(readName, true);
    if (skipped == 0)
        return false; // EOF
    if (readName.getBytes()[0] != '@')
        throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage() + ". Line: " + readName + ". \n");

    value.append(readName.getBytes(), 0, readName.getLength());

    // sequence
    appendLineInto(value, false);

    // separator line
    appendLineInto(value, false);

    // quality
    appendLineInto(value, false);

    return true;
}

开发者ID:PAA-NCIC，项目名称:SparkSeq，代码行数:23，代码来源:SingleFastqInputFormat.java

示例3: writeString

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Write a String as a VInt n, followed by n Bytes as in Text format.
 * 
 * @param out
 * @param s
 * @throws IOException
 */
public static void writeString(DataOutput out, String s) throws IOException {
  if (s != null) {
    Text text = new Text(s);
    byte[] buffer = text.getBytes();
    int len = text.getLength();
    writeVInt(out, len);
    out.write(buffer, 0, len);
  } else {
    writeVInt(out, -1);
  }
}

开发者ID:nucypher，项目名称:hadoop-oss，代码行数:19，代码来源:Utils.java

示例4: setSafeValue

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) {
  final Text value = ((StringObjectInspector)oi).getPrimitiveWritableObject(hiveFieldValue);
  final byte[] valueBytes = value.getBytes();
  final int len = value.getLength();
  ((NullableVarCharVector) outputVV).getMutator().setSafe(outputIndex, valueBytes, 0, len);
}

开发者ID:skhalifa，项目名称:QDrill，代码行数:8，代码来源:HiveFieldConverter.java

示例5: skipUtfByteOrderMark

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
  // Strip BOM(Byte Order Mark)
  // Text only support UTF-8, we only need to check UTF-8 BOM
  // (0xEF,0xBB,0xBF) at the start of the text stream.
  int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
      Integer.MAX_VALUE);
  int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
  // Even we read 3 extra bytes for the first line,
  // we won't alter existing behavior (no backwards incompat issue).
  // Because the newSize is less than maxLineLength and
  // the number of bytes copied to Text is always no more than newSize.
  // If the return size from readLine is not less than maxLineLength,
  // we will discard the current line and read the next line.
  pos += newSize;
  int textLength = value.getLength();
  byte[] textBytes = value.getBytes();
  if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
      (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
    // find UTF-8 BOM, strip it.
    LOG.info("Found UTF-8 BOM and skipped it");
    textLength -= 3;
    newSize -= 3;
    if (textLength > 0) {
      // It may work to use the same buffer and not do the copyBytes
      textBytes = value.copyBytes();
      value.set(textBytes, 3, textLength);
    } else {
      value.clear();
    }
  }
  return newSize;
}

开发者ID:naver，项目名称:hadoop，代码行数:33，代码来源:LineRecordReader.java

示例6: buildTrie

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = (byte) 255;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}

开发者ID:naver，项目名称:hadoop，代码行数:41，代码来源:TeraSort.java

示例7: getPartition

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public int getPartition(Text key, Text value, int numPartitions) {
  byte[] bytes = key.getBytes();
  int len = Math.min(PREFIX_LENGTH, key.getLength());
  int prefix = 0;
  for(int i=0; i < len; ++i) {
    prefix = (prefix << 8) | (0xff & bytes[i]);
  }
  return prefix / prefixesPerReduce;
}

开发者ID:naver，项目名称:hadoop，代码行数:11，代码来源:TeraSort.java

示例8: map

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Convert a line of TSV text into an HBase table row after transforming the
 * values by multiplying them by 3.
 */
@Override
public void map(LongWritable offset, Text value, Context context)
      throws IOException {
  byte[] family = Bytes.toBytes("FAM");
  final byte[][] qualifiers = { Bytes.toBytes("A"), Bytes.toBytes("B") };

  // do some basic line parsing
  byte[] lineBytes = value.getBytes();
  String[] valueTokens = new String(lineBytes, "UTF-8").split("\u001b");

  // create the rowKey and Put
  ImmutableBytesWritable rowKey =
    new ImmutableBytesWritable(Bytes.toBytes(valueTokens[0]));
  Put put = new Put(rowKey.copyBytes());
  put.setDurability(Durability.SKIP_WAL);

  //The value should look like this: VALUE1 or VALUE2. Let's multiply
  //the integer by 3
  for(int i = 1; i < valueTokens.length; i++) {
    String prefix = valueTokens[i].substring(0, "VALUE".length());
    String suffix = valueTokens[i].substring("VALUE".length());
    String newValue = prefix + Integer.parseInt(suffix) * 3;

    KeyValue kv = new KeyValue(rowKey.copyBytes(), family,
        qualifiers[i-1], Bytes.toBytes(newValue));
    put.add(kv);
  }

  try {
    context.write(rowKey, put);
  } catch (InterruptedException e) {
    e.printStackTrace();
  }
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:39，代码来源:TsvImporterCustomTestMapper.java

示例9: readLeadingInt

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static int readLeadingInt(Text txt) throws IOException {
  DataInputStream in =
    new DataInputStream(new ByteArrayInputStream(txt.getBytes()));
  return in.readInt();
}

开发者ID:nucypher，项目名称:hadoop-oss，代码行数:6，代码来源:TestCodec.java

示例10: reduce

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@Override
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text,Text> out, Reporter reporter)
    throws IOException {
  int vc = 0;
  final int vlen;
  final int preRec = nRec;
  final int vcCheck, recCheck;
  ((StringBuilder)fmt.out()).setLength(keylen);
  if (25 == key.getLength()) {
    // tagged record
    recCheck = 1;   // expect only 1 record
    switch ((char)key.getBytes()[0]) {
      case 'A':
        vlen = getValLen(++aKey, nMaps) - 128;
        vcCheck = aKey; // expect eq id
        break;
      case 'B':
        vlen = getValLen(++bKey, nMaps);
        vcCheck = bKey; // expect eq id
        break;
      default:
        vlen = vcCheck = -1;
        fail("Unexpected tag on record: " + ((char)key.getBytes()[24]));
    }
    kb.set((char)key.getBytes()[0] + fmt.format(tagfmt,vcCheck).toString());
  } else {
    kb.set(fmt.format(tagfmt, ++nKey).toString());
    vlen = 1000;
    recCheck = nMaps;                      // expect 1 rec per map
    vcCheck = (nMaps * (nMaps - 1)) >>> 1; // expect eq sum(id)
  }
  assertEquals(kb, key);
  while (values.hasNext()) {
    final Text val = values.next();
    // increment vc by map ID assoc w/ val
    vc += val.getBytes()[0];
    // verify that all the fixed characters 'V' match
    assertEquals(0, WritableComparator.compareBytes(
          vb.getBytes(), 1, vlen - 1,
          val.getBytes(), 1, val.getLength() - 1));
    out.collect(key, val);
    ++nRec;
  }
  assertEquals("Bad rec count for " + key, recCheck, nRec - preRec);
  assertEquals("Bad rec group for " + key, vcCheck, vc);
}

开发者ID:naver，项目名称:hadoop，代码行数:48，代码来源:TestReduceFetchFromPartialMem.java

注：本文中的org.apache.hadoop.io.Text.getBytes方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。