当前位置: 首页>>代码示例>>Java>>正文


Java Text.set方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.Text.set方法的典型用法代码示例。如果您正苦于以下问题:Java Text.set方法的具体用法?Java Text.set怎么用?Java Text.set使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.Text的用法示例。


在下文中一共展示了Text.set方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: splitKeyVal

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:21,代码来源:TextOutputReader.java

示例2: next

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public synchronized boolean next(Text key, Text value) throws IOException {
  numNext++;
  if (pos_ >= end_) {
    return false;
  }

  DataOutputBuffer buf = new DataOutputBuffer();
  if (!readUntilMatchBegin()) {
    return false;
  }
  if (pos_ >= end_ || !readUntilMatchEnd(buf)) {
    return false;
  }

  // There is only one elem..key/value splitting is not done here.
  byte[] record = new byte[buf.getLength()];
  System.arraycopy(buf.getData(), 0, record, 0, record.length);

  numRecStats(record, 0, record.length);

  key.set(record);
  value.set("");

  return true;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:StreamXmlRecordReader.java

示例3: createFakeCredentials

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
static DataInputBuffer createFakeCredentials(Random r, int nTok)
      throws IOException {
    Credentials creds = new Credentials();
    byte[] password = new byte[20];
    Text kind = new Text();
    Text service = new Text();
    Text alias = new Text();
    for (int i = 0; i < nTok; ++i) {
      byte[] identifier = ("idef" + i).getBytes();
      r.nextBytes(password);
      kind.set("kind" + i);
      service.set("service" + i);
      alias.set("token" + i);
      Token token = new Token(identifier, password, kind, service);
      creds.addToken(alias, token);
    }
    DataOutputBuffer buf = new DataOutputBuffer();
    creds.writeTokenStorageToStream(buf);
    DataInputBuffer ret = new DataInputBuffer();
    ret.reset(buf.getData(), 0, buf.getLength());
    return ret;
  }
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:TestContainerLocalizer.java

示例4: getRecordReader

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public RecordReader<Text,Text> getRecordReader(InputSplit split,
    JobConf job, Reporter reporter) throws IOException {
  final IndirectInputFormat.IndirectSplit clSplit =
    (IndirectInputFormat.IndirectSplit)split;
  return new RecordReader<Text,Text>() {
    boolean once = true;
    public boolean next(Text key, Text value) {
      if (once) {
        key.set(clSplit.getPath().toString());
        once = false;
        return true;
      }
      return false;
    }
    public Text createKey() { return new Text(); }
    public Text createValue() { return new Text(); }
    public long getPos() { return 0; }
    public void close() { }
    public float getProgress() { return 0.0f; }
  };
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:GenericMRLoadGenerator.java

示例5: genRandom

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static void genRandom(Text t, int len, StringBuilder sb) {
  sb.setLength(0);
  for (int i = 0; i < len; ++i) {
    sb.append(Integer.toString(r.nextInt(26) + 10, 36));
  }
  t.set(sb.toString());
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:8,代码来源:TestIndexedSort.java

示例6: writeOutput

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException {
    Path outPath = new Path(output);
    if (distribFs.exists(outPath)) {
        throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output);
    }

    Writer writer = SequenceFile.createWriter(distribFs.getConf(),
            Writer.file(outPath),
            Writer.keyClass(Text.class),
            Writer.valueClass(BytesWritable.class),
            Writer.compression(SequenceFile.CompressionType.RECORD));
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    long skipped = 0;
    long copied = 0;
    while (input.hasNext()) {
        FileStatus next = input.next();
        if (filter(next)) {
            key.set(next.getPath().toString());
            FSDataInputStream stream = localFs.open(next.getPath());
            //CAUTION : this could cause memory overflow
            byte[] bytes = IOUtils.toByteArray(stream);
            value.set(bytes, 0, bytes.length);
            writer.append(key, value);
            copied++;
        } else {
            skipped++;
        }
    }
    writer.close();
    System.out.println("Files copied ::" + copied);
    System.out.println("Files skipped ::" + skipped);
}
 
开发者ID:thammegowda,项目名称:tika-dl4j-spark-imgrec,代码行数:34,代码来源:Local2SeqFile.java

示例7: next

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private boolean next(LongWritable key, Text value) throws IOException {
    /**
     *  通过readUntilMatch方法查找xml段开始的标签,直到找到了,才开始
     *  写xml片段到buffer中去,如readUntilMatch的第二个参数为false则不查找的过
     *  程中写入数据到buffer,如果为true的话就边查找边写入
     */
    if( fsin.getPos() < end && readUntilMatch(startTag, false)) {
        //进入代码段则说明找到了开始标签,现在fsin的指针指在找到的开始标签的
        //最后一位上,所以向buffer中写入开始标签
        buffer.write(startTag);
        try {
            /**
             * 在fsin中去查找结束标签边查找边记录直到找到结束标签为止
             */
            if(readUntilMatch(endTag, true)) {
                /**
                 * 找到标签后把start标签的指针位置的偏移量赋值给key
                 * 把buffer中记录的整个xml完整片断赋值给value
                 */
                key.set(fsin.getPos() - buffer.getLength());
                value.set(buffer.getData(), 0, buffer.getLength());
                return true;
            }
        } finally {
            buffer.reset();
        }
    }
    return false;
}
 
开发者ID:lzmhhh123,项目名称:Wikipedia-Index,代码行数:30,代码来源:XmlInputFormat.java

示例8: validateFinalListing

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/**
 * Validate the final resulting path listing. Checks if there are duplicate entries. If preserving ACLs, checks that
 * file system can support ACLs. If preserving XAttrs, checks that file system can support XAttrs.
 *
 * @param pathToListFile path listing build by doBuildListing
 * @param options Input options to S3MapReduceCp
 * @throws IOException Any issues while checking for duplicates and throws
 * @throws DuplicateFileException if there are duplicates
 */
private void validateFinalListing(Path pathToListFile, S3MapReduceCpOptions options)
  throws DuplicateFileException, IOException {

  Configuration config = getConf();
  FileSystem fs = pathToListFile.getFileSystem(config);

  Path sortedList = sortListing(fs, config, pathToListFile);

  SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(sortedList));
  try {
    Text lastKey = new Text("*"); // source relative path can never hold *
    CopyListingFileStatus lastFileStatus = new CopyListingFileStatus();

    Text currentKey = new Text();
    while (reader.next(currentKey)) {
      if (currentKey.equals(lastKey)) {
        CopyListingFileStatus currentFileStatus = new CopyListingFileStatus();
        reader.getCurrentValue(currentFileStatus);
        throw new DuplicateFileException("File "
            + lastFileStatus.getPath()
            + " and "
            + currentFileStatus.getPath()
            + " would cause duplicates. Aborting");
      }
      reader.getCurrentValue(lastFileStatus);
      lastKey.set(currentKey);
    }
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:41,代码来源:CopyListing.java

示例9: next

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
/** Read key/value pair in a line. */
public synchronized boolean next(Text key, Text value) throws IOException {
  Text tKey = key;
  Text tValue = value;
  if (!sequenceFileRecordReader.next(innerKey, innerValue)) {
    return false;
  }
  tKey.set(innerKey.toString());
  tValue.set(innerValue.toString());
  return true;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:12,代码来源:SequenceFileAsTextRecordReader.java

示例10: skipUtfByteOrderMark

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int skipUtfByteOrderMark(Text value) throws IOException {
  // Strip BOM(Byte Order Mark)
  // Text only support UTF-8, we only need to check UTF-8 BOM
  // (0xEF,0xBB,0xBF) at the start of the text stream.
  int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength,
      Integer.MAX_VALUE);
  int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos));
  // Even we read 3 extra bytes for the first line,
  // we won't alter existing behavior (no backwards incompat issue).
  // Because the newSize is less than maxLineLength and
  // the number of bytes copied to Text is always no more than newSize.
  // If the return size from readLine is not less than maxLineLength,
  // we will discard the current line and read the next line.
  pos += newSize;
  int textLength = value.getLength();
  byte[] textBytes = value.getBytes();
  if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) &&
      (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) {
    // find UTF-8 BOM, strip it.
    LOG.info("Found UTF-8 BOM and skipped it");
    textLength -= 3;
    newSize -= 3;
    if (textLength > 0) {
      // It may work to use the same buffer and not do the copyBytes
      textBytes = value.copyBytes();
      value.set(textBytes, 3, textLength);
    } else {
      value.clear();
    }
  }
  return newSize;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:33,代码来源:LineRecordReader.java

示例11: next

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public boolean next(Text key, Text value) {
  if (name != null) {
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:9,代码来源:UtilsForTests.java

示例12: generateSentence

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private int generateSentence(Text t, int noWords) {
  sentence.setLength(0);
  --noWords;
  for (int i = 0; i < noWords; ++i) {
    sentence.append(words[r.nextInt(words.length)]);
    sentence.append(" ");
  }
  if (noWords >= 0) sentence.append(words[r.nextInt(words.length)]);
  t.set(sentence.toString());
  return sentence.length();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:12,代码来源:GenericMRLoadGenerator.java

示例13: createMapFile

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static void createMapFile(Configuration conf, FileSystem fs, Path path, 
    CompressionCodec codec, CompressionType type, int records) throws IOException {
  MapFile.Writer writer = 
      new MapFile.Writer(conf, path,
          MapFile.Writer.keyClass(Text.class),
          MapFile.Writer.valueClass(Text.class),
          MapFile.Writer.compression(type, codec));
  Text key = new Text();
  for (int j = 0; j < records; j++) {
      key.set(String.format("%03d", j));
      writer.append(key, key);
  }
  writer.close();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:15,代码来源:TestCodec.java

示例14: nextKeyValue

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
public boolean nextKeyValue() {
  if (name != null) {
    key = new Text();
    key.set(name.getName());
    name = null;
    return true;
  }
  return false;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:10,代码来源:RandomWriter.java

示例15: writeSimpleSrc

import org.apache.hadoop.io.Text; //导入方法依赖的package包/类
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
    int srcs) throws IOException {
  SequenceFile.Writer out[] = null;
  Path[] src = new Path[srcs];
  try {
    out = createWriters(testdir, conf, srcs, src);
    final int capacity = srcs * 2 + 1;
    Text key = new Text();
    key.set("ignored");
    Text val = new Text();
    for (int k = 0; k < capacity; ++k) {
      for (int i = 0; i < srcs; ++i) {
        val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
            "\t" + Integer.toString(10 * k + i));
        out[i].append(key, val);
        if (i == k) {
          // add duplicate key
          out[i].append(key, val);
        }
      }
    }
  } finally {
    if (out != null) {
      for (int i = 0; i < srcs; ++i) {
        if (out[i] != null)
          out[i].close();
      }
    }
  }
  return src;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:32,代码来源:TestDataJoin.java


注:本文中的org.apache.hadoop.io.Text.set方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。