當前位置: 首頁>>代碼示例>>Java>>正文


Java Text.set方法代碼示例

本文整理匯總了Java中org.apache.hadoop.io.Text.set方法的典型用法代碼示例。如果您正苦於以下問題:Java Text.set方法的具體用法?Java Text.set怎麽用?Java Text.set使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.io.Text的用法示例。


在下文中一共展示了Text.set方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: splitKeyVal

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:21,代碼來源:TextOutputReader.java

示例2: next

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
public synchronized boolean next(Text key, Text value) throws IOException {
  numNext++;
  if (pos_ >= end_) {
    return false;
  }

  DataOutputBuffer buf = new DataOutputBuffer();
  if (!readUntilMatchBegin()) {
    return false;
  }
  if (pos_ >= end_ || !readUntilMatchEnd(buf)) {
    return false;
  }

  // There is only one elem..key/value splitting is not done here.
  byte[] record = new byte[buf.getLength()];
  System.arraycopy(buf.getData(), 0, record, 0, record.length);

  numRecStats(record, 0, record.length);

  key.set(record);
  value.set("");

  return true;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:26,代碼來源:StreamXmlRecordReader.java

示例3: createFakeCredentials

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
      throws IOException {
    Credentials creds = new Credentials();
    byte[] password = new byte[20];
    Text kind = new Text();
    Text service = new Text();
    Text alias = new Text();
    for (int i = 0; i < nTok; ++i) {
      byte[] identifier = ("idef" + i).getBytes();
      r.nextBytes(password);
      kind.set("kind" + i);
      service.set("service" + i);
      alias.set("token" + i);
      Token token = new Token(identifier, password, kind, service);
      creds.addToken(alias, token);
    }
    DataOutputBuffer buf = new DataOutputBuffer();
    creds.writeTokenStorageToStream(buf);
    DataInputBuffer ret = new DataInputBuffer();
    ret.reset(buf.getData(), 0, buf.getLength());
    return ret;
  }
 
開發者ID:naver,項目名稱:hadoop,代碼行數:24,代碼來源:TestContainerLocalizer.java

示例4: getRecordReader

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
public RecordReader<Text,Text> getRecordReader(InputSplit split,
    JobConf job, Reporter reporter) throws IOException {
  final IndirectInputFormat.IndirectSplit clSplit =
    (IndirectInputFormat.IndirectSplit)split;
  return new RecordReader<Text,Text>() {
    boolean once = true;
    public boolean next(Text key, Text value) {
      if (once) {
        key.set(clSplit.getPath().toString());
        once = false;
        return true;
      }
      return false;
    }
    public Text createKey() { return new Text(); }
    public Text createValue() { return new Text(); }
    public long getPos() { return 0; }
    public void close() { }
    public float getProgress() { return 0.0f; }
  };
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:22,代碼來源:GenericMRLoadGenerator.java

示例5: genRandom

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private static void genRandom(Text t, int len, StringBuilder sb) {
  sb.setLength(0);
  for (int i = 0; i < len; ++i) {
    sb.append(Integer.toString(r.nextInt(26) + 10, 36));
  }
  t.set(sb.toString());
}
 
開發者ID:nucypher,項目名稱:hadoop-oss,代碼行數:8,代碼來源:TestIndexedSort.java

示例6: writeOutput

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException {
    Path outPath = new Path(output);
    if (distribFs.exists(outPath)) {
        throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output);
    }

    Writer writer = SequenceFile.createWriter(distribFs.getConf(),
            Writer.file(outPath),
            Writer.keyClass(Text.class),
            Writer.valueClass(BytesWritable.class),
            Writer.compression(SequenceFile.CompressionType.RECORD));
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    long skipped = 0;
    long copied = 0;
    while (input.hasNext()) {
        FileStatus next = input.next();
        if (filter(next)) {
            key.set(next.getPath().toString());
            FSDataInputStream stream = localFs.open(next.getPath());
            //CAUTION : this could cause memory overflow
            byte[] bytes = IOUtils.toByteArray(stream);
            value.set(bytes, 0, bytes.length);
            writer.append(key, value);
            copied++;
        } else {
            skipped++;
        }
    }
    writer.close();
    System.out.println("Files copied ::" + copied);
    System.out.println("Files skipped ::" + skipped);
}
 
開發者ID:thammegowda,項目名稱:tika-dl4j-spark-imgrec,代碼行數:34,代碼來源:Local2SeqFile.java

示例7: next

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private boolean next(LongWritable key, Text value) throws IOException {
    /**
     *  通過readUntilMatch方法查找xml段開始的標簽,直到找到了,才開始
     *  寫xml片段到buffer中去,如readUntilMatch的第二個參數為false則不查找的過
     *  程中寫入數據到buffer,如果為true的話就邊查找邊寫入
     */
    if( fsin.getPos() < end && readUntilMatch(startTag, false)) {
        //進入代碼段則說明找到了開始標簽,現在fsin的指針指在找到的開始標簽的
        //最後一位上,所以向buffer中寫入開始標簽
        buffer.write(startTag);
        try {
            /**
             * 在fsin中去查找結束標簽邊查找邊記錄直到找到結束標簽為止
             */
            if(readUntilMatch(endTag, true)) {
                /**
                 * 找到標簽後把start標簽的指針位置的偏移量賦值給key
                 * 把buffer中記錄的整個xml完整片斷賦值給value
                 */
                key.set(fsin.getPos() - buffer.getLength());
                value.set(buffer.getData(), 0, buffer.getLength());
                return true;
            }
        } finally {
            buffer.reset();
        }
    }
    return false;
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:30,代碼來源:XmlInputFormat.java

示例8: validateFinalListing

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
/**
 * Validate the final resulting path listing. Checks if there are duplicate entries. If preserving ACLs, checks that
 * file system can support ACLs. If preserving XAttrs, checks that file system can support XAttrs.
 *
 * @param pathToListFile path listing build by doBuildListing
 * @param options Input options to S3MapReduceCp
 * @throws IOException Any issues while checking for duplicates and throws
 * @throws DuplicateFileException if there are duplicates
 */
private void validateFinalListing(Path pathToListFile, S3MapReduceCpOptions options)
  throws DuplicateFileException, IOException {

  Configuration config = getConf();
  FileSystem fs = pathToListFile.getFileSystem(config);

  Path sortedList = sortListing(fs, config, pathToListFile);

  SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(sortedList));
  try {
    Text lastKey = new Text("*"); // source relative path can never hold *
    CopyListingFileStatus lastFileStatus = new CopyListingFileStatus();

    Text currentKey = new Text();
    while (reader.next(currentKey)) {
      if (currentKey.equals(lastKey)) {
        CopyListingFileStatus currentFileStatus = new CopyListingFileStatus();
        reader.getCurrentValue(currentFileStatus);
        throw new DuplicateFileException("File "
            + lastFileStatus.getPath()
            + " and "
            + currentFileStatus.getPath()
            + " would cause duplicates. Aborting");
      }
      reader.getCurrentValue(lastFileStatus);
      lastKey.set(currentKey);
    }
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
開發者ID:HotelsDotCom,項目名稱:circus-train,代碼行數:41,代碼來源:CopyListing.java

示例9: next

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:12,代碼來源:SequenceFileAsTextRecordReader.java

示例10: skipUtfByteOrderMark

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private int skipUtfByteOrderMark(Text value) throws IOException {
  // Strip BOM(Byte Order Mark)
  // Text only support UTF-8, we only need to check UTF-8 BOM
  // (0xEF,0xBB,0xBF) at the start of the text stream.
  int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
      Integer.MAX_VALUE);
  int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
  // Even we read 3 extra bytes for the first line,
  // we won't alter existing behavior (no backwards incompat issue).
  // Because the newSize is less than maxLineLength and
  // the number of bytes copied to Text is always no more than newSize.
  // If the return size from readLine is not less than maxLineLength,
  // we will discard the current line and read the next line.
  pos += newSize;
  int textLength = value.getLength();
  byte[] textBytes = value.getBytes();
  if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
      (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
    // find UTF-8 BOM, strip it.
    LOG.info("Found UTF-8 BOM and skipped it");
    textLength -= 3;
    newSize -= 3;
    if (textLength > 0) {
      // It may work to use the same buffer and not do the copyBytes
      textBytes = value.copyBytes();
      value.set(textBytes, 3, textLength);
    } else {
      value.clear();
    }
  }
  return newSize;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:33,代碼來源:LineRecordReader.java

示例11: next

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
public boolean next(Text key, Text value) {
  if (name != null) {
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:9,代碼來源:UtilsForTests.java

示例12: generateSentence

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private int generateSentence(Text t, int noWords) {
  sentence.setLength(0);
  --noWords;
  for (int i = 0; i < noWords; ++i) {
    sentence.append(words[r.nextInt(words.length)]);
    sentence.append(" ");
  }
  if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
  t.set(sentence.toString());
  return sentence.length();
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:12,代碼來源:GenericMRLoadGenerator.java

示例13: createMapFile

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private static void createMapFile(Configuration conf, FileSystem fs, Path path, 
    CompressionCodec codec, CompressionType type, int records) throws IOException {
  MapFile.Writer writer = 
      new MapFile.Writer(conf, path,
          MapFile.Writer.keyClass(Text.class),
          MapFile.Writer.valueClass(Text.class),
          MapFile.Writer.compression(type, codec));
  Text key = new Text();
  for (int j = 0; j < records; j++) {
      key.set(String.format("%03d", j));
      writer.append(key, key);
  }
  writer.close();
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:15,代碼來源:TestCodec.java

示例14: nextKeyValue

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
public boolean nextKeyValue() {
  if (name != null) {
    key = new Text();
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:10,代碼來源:RandomWriter.java

示例15: writeSimpleSrc

import org.apache.hadoop.io.Text; //導入方法依賴的package包/類
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
    int srcs) throws IOException {
  SequenceFile.Writer out[] = null;
  Path[] src = new Path[srcs];
  try {
    out = createWriters(testdir, conf, srcs, src);
    final int capacity = srcs * 2 + 1;
    Text key = new Text();
    key.set("ignored");
    Text val = new Text();
    for (int k = 0; k < capacity; ++k) {
      for (int i = 0; i < srcs; ++i) {
        val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
            "\t" + Integer.toString(10 * k + i));
        out[i].append(key, val);
        if (i == k) {
          // add duplicate key
          out[i].append(key, val);
        }
      }
    }
  } finally {
    if (out != null) {
      for (int i = 0; i < srcs; ++i) {
        if (out[i] != null)
          out[i].close();
      }
    }
  }
  return src;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:32,代碼來源:TestDataJoin.java


注:本文中的org.apache.hadoop.io.Text.set方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。