当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFile.Writer方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.Writer方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.Writer方法的具体用法?Java SequenceFile.Writer怎么用?Java SequenceFile.Writer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.SequenceFile的用法示例。


在下文中一共展示了SequenceFile.Writer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doBuildListing

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Collect the list of <sourceRelativePath, sourceFileStatus> to be copied and write to the sequence file. In essence,
 * any file or directory that need to be copied or sync-ed is written as an entry to the sequence file, with the
 * possible exception of the source root: when either -update (sync) or -overwrite switch is specified, and if the the
 * source root is a directory, then the source root entry is not written to the sequence file, because only the
 * contents of the source directory need to be copied in this case. See
 * {@link com.hotels.bdp.circustrain.s3mapreducecp.util.ConfigurationUtil#getRelativePath} for how relative path is
 * computed. See computeSourceRootPath method for how the root path of the source is computed.
 *
 * @param fileListWriter
 * @param options
 * @param globbedPaths
 * @throws IOException
 */
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter, S3MapReduceCpOptions options) throws IOException {
  List<Path> globbedPaths = new ArrayList<>(options.getSources().size());

  for (Path sourcePath : options.getSources()) {
    FileSystem fs = sourcePath.getFileSystem(getConf());
    FileStatus sourceFileStatus = fs.getFileStatus(sourcePath);
    if (sourceFileStatus.isFile()) {
      LOG.debug("Adding path {}", sourceFileStatus.getPath());
      globbedPaths.add(sourceFileStatus.getPath());
    } else {
      FileStatus[] inputs = fs.globStatus(sourcePath);
      if (inputs != null && inputs.length > 0) {
        for (FileStatus onePath : inputs) {
          LOG.debug("Adding path {}", onePath.getPath());
          globbedPaths.add(onePath.getPath());
        }
      } else {
        throw new InvalidInputException("Source path " + sourcePath + " doesn't exist");
      }
    }
  }
  doBuildListing(fileListWriter, options, globbedPaths);
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:39,代码来源:SimpleCopyListing.java

示例2: traverseNonEmptyDirectory

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void traverseNonEmptyDirectory(
    SequenceFile.Writer fileListWriter,
    FileStatus sourceStatus,
    Path sourcePathRoot,
    S3MapReduceCpOptions options)
  throws IOException {
  FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
  Stack<FileStatus> pathStack = new Stack<>();
  pathStack.push(sourceStatus);

  while (!pathStack.isEmpty()) {
    for (FileStatus child : getChildren(sourceFS, pathStack.pop())) {
      if (child.isFile()) {
        LOG.debug("Recording source-path: {} for copy.", sourceStatus.getPath());
        CopyListingFileStatus childCopyListingStatus = new CopyListingFileStatus(child);
        writeToFileListing(fileListWriter, childCopyListingStatus, sourcePathRoot, options);
      }
      if (isDirectoryAndNotEmpty(sourceFS, child)) {
        LOG.debug("Traversing non-empty source dir: {}", sourceStatus.getPath());
        pathStack.push(child);
      }
    }
  }
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:25,代码来源:SimpleCopyListing.java

示例3: writeToFileListing

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void writeToFileListing(
    SequenceFile.Writer fileListWriter,
    CopyListingFileStatus fileStatus,
    Path sourcePathRoot,
    S3MapReduceCpOptions options)
  throws IOException {
  LOG.debug("REL PATH: {}, FULL PATH: {}", PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath()),
      fileStatus.getPath());

  FileStatus status = fileStatus;

  if (!shouldCopy(fileStatus.getPath(), options)) {
    return;
  }

  fileListWriter.append(new Text(PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath())), status);
  fileListWriter.sync();

  if (!fileStatus.isDirectory()) {
    totalBytesToCopy += fileStatus.getLen();
  }
  totalPaths++;
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:24,代码来源:SimpleCopyListing.java

示例4: writeToFileListing

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
                                CopyListingFileStatus fileStatus,
                                Path sourcePathRoot,
                                DistCpOptions options) throws IOException {
  if (LOG.isDebugEnabled()) {
    LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
  }

  FileStatus status = fileStatus;

  if (!shouldCopy(fileStatus.getPath(), options)) {
    return;
  }

  fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath())), status);
  fileListWriter.sync();

  if (!fileStatus.isDirectory()) {
    totalBytesToCopy += fileStatus.getLen();
  }
  totalPaths++;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:25,代码来源:SimpleCopyListing.java

示例5: createFiles

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random,
  Job job) throws IOException {
  Range[] ranges = createRanges(length, numFiles, random);

  for (int i = 0; i < numFiles; i++) {
    Path file = new Path(workDir, "test_" + i + ".seq");
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer =
      SequenceFile.createWriter(localFs, job.getConfiguration(), file,
                                IntWritable.class, BytesWritable.class);
    Range range = ranges[i];
    try {
      for (int j = range.start; j < range.end; j++) {
        IntWritable key = new IntWritable(j);
        byte[] data = new byte[random.nextInt(10)];
        random.nextBytes(data);
        BytesWritable value = new BytesWritable(data);
        writer.append(key, value);
      }
    } finally {
      writer.close();
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:TestCombineSequenceFileInputFormat.java

示例6: createWriters

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static SequenceFile.Writer[] createWriters(Path testdir,
    Configuration conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], IntWritable.class, IntWritable.class);
  }
  return out;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:13,代码来源:TestDatamerge.java

示例7: createControlFiles

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Create control files before a test run.
 * Number of files created is equal to the number of maps specified
 * 
 * @throws IOException on error
 */
private static void createControlFiles() throws IOException {
  FileSystem tempFS = FileSystem.get(config);
  LOG.info("Creating " + numberOfMaps + " control files");

  for (int i = 0; i < numberOfMaps; i++) {
    String strFileName = "NNBench_Controlfile_" + i;
    Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
            strFileName);

    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class, 
              LongWritable.class, CompressionType.NONE);
      writer.append(new Text(strFileName), new LongWritable(0l));
    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:28,代码来源:NNBench.java

示例8: createFiles

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random)
  throws IOException {
  Range[] ranges = createRanges(length, numFiles, random);

  for (int i = 0; i < numFiles; i++) {
    Path file = new Path(workDir, "test_" + i + ".seq");
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer =
      SequenceFile.createWriter(localFs, conf, file,
                                IntWritable.class, BytesWritable.class);
    Range range = ranges[i];
    try {
      for (int j = range.start; j < range.end; j++) {
        IntWritable key = new IntWritable(j);
        byte[] data = new byte[random.nextInt(10)];
        random.nextBytes(data);
        BytesWritable value = new BytesWritable(data);
        writer.append(key, value);
      }
    } finally {
      writer.close();
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:TestCombineSequenceFileInputFormat.java

示例9: writeSimpleSrc

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
    int srcs) throws IOException {
  SequenceFile.Writer out[] = null;
  Path[] src = new Path[srcs];
  try {
    out = createWriters(testdir, conf, srcs, src);
    final int capacity = srcs * 2 + 1;
    Text key = new Text();
    key.set("ignored");
    Text val = new Text();
    for (int k = 0; k < capacity; ++k) {
      for (int i = 0; i < srcs; ++i) {
        val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
            "\t" + Integer.toString(10 * k + i));
        out[i].append(key, val);
        if (i == k) {
          // add duplicate key
          out[i].append(key, val);
        }
      }
    }
  } finally {
    if (out != null) {
      for (int i = 0; i < srcs; ++i) {
        if (out[i] != null)
          out[i].close();
      }
    }
  }
  return src;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:32,代码来源:TestDataJoin.java

示例10: getWriter

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(getConf());
  if (fs.exists(pathToListFile)) {
    fs.delete(pathToListFile, false);
  }
  return SequenceFile.createWriter(getConf(), SequenceFile.Writer.file(pathToListFile),
      SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
      SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:10,代码来源:SimpleCopyListing.java

示例11: writePartitions

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Write out a {@link SequenceFile} that can be read by
 * {@link TotalOrderPartitioner} that contains the split points in startKeys.
 */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath,
    List<ImmutableBytesWritable> startKeys) throws IOException {
  LOG.info("Writing partition information to " + partitionsPath);
  if (startKeys.isEmpty()) {
    throw new IllegalArgumentException("No regions passed");
  }

  // We're generating a list of split points, and we don't ever
  // have keys < the first region (which has an empty start key)
  // so we need to remove it. Otherwise we would end up with an
  // empty reducer with index 0
  TreeSet<ImmutableBytesWritable> sorted =
    new TreeSet<ImmutableBytesWritable>(startKeys);

  ImmutableBytesWritable first = sorted.first();
  if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
    throw new IllegalArgumentException(
        "First region of table should have empty start key. Instead has: "
        + Bytes.toStringBinary(first.get()));
  }
  sorted.remove(first);

  // Write the actual file
  FileSystem fs = partitionsPath.getFileSystem(conf);
  SequenceFile.Writer writer = SequenceFile.createWriter(
    fs, conf, partitionsPath, ImmutableBytesWritable.class,
    NullWritable.class);

  try {
    for (ImmutableBytesWritable startKey : sorted) {
      writer.append(startKey, NullWritable.get());
    }
  } finally {
    writer.close();
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:42,代码来源:HFileOutputFormat2.java

示例12: getWriter

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(getConf());
  if (fs.exists(pathToListFile)) {
    fs.delete(pathToListFile, false);
  }
  return SequenceFile.createWriter(getConf(),
          SequenceFile.Writer.file(pathToListFile),
          SequenceFile.Writer.keyClass(Text.class),
          SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
          SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:12,代码来源:SimpleCopyListing.java

示例13: createControlFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createControlFile(
                                      FileSystem fs,
                                      int fileSize, // in MB 
                                      int nrFiles
                                      ) throws IOException {
  LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files");

  fs.delete(CONTROL_DIR, true);

  for(int i=0; i < nrFiles; i++) {
    String name = getFileName(i);
    Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(fs, fsConfig, controlFile,
                                         Text.class, LongWritable.class,
                                         CompressionType.NONE);
      writer.append(new Text(name), new LongWritable(fileSize));
    } catch(Exception e) {
      throw new IOException(e.getLocalizedMessage());
    } finally {
  	if (writer != null)
        writer.close();
  	writer = null;
    }
  }
  LOG.info("created control files for: "+nrFiles+" files");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:DFSCIOTest.java

示例14: getRecordWriter

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public RecordWriter<K, V> getRecordWriter(
                                        FileSystem ignored, JobConf job,
                                        String name, Progressable progress)
  throws IOException {
  // get the path of the temporary output file 
  Path file = FileOutputFormat.getTaskOutputPath(job, name);
  
  FileSystem fs = file.getFileSystem(job);
  CompressionCodec codec = null;
  CompressionType compressionType = CompressionType.NONE;
  if (getCompressOutput(job)) {
    // find the kind of compression to do
    compressionType = getOutputCompressionType(job);

    // find the right codec
    Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
 DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  final SequenceFile.Writer out = 
    SequenceFile.createWriter(fs, job, file,
                              job.getOutputKeyClass(),
                              job.getOutputValueClass(),
                              compressionType,
                              codec,
                              progress);

  return new RecordWriter<K, V>() {

      public void write(K key, V value)
        throws IOException {

        out.append(key, value);
      }

      public void close(Reporter reporter) throws IOException { out.close();}
    };
}
 
开发者ID:naver,项目名称:hadoop,代码行数:39,代码来源:SequenceFileOutputFormat.java

示例15: cleanup

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Reduce task done, write output to a file.
 */
@Override
public void cleanup(Context context) throws IOException {
  //write output to a file
  Configuration conf = context.getConfiguration();
  Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
  Path outFile = new Path(outDir, "reduce-out");
  FileSystem fileSys = FileSystem.get(conf);
  SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
      outFile, LongWritable.class, LongWritable.class, 
      CompressionType.NONE);
  writer.append(new LongWritable(numInside), new LongWritable(numOutside));
  writer.close();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:QuasiMonteCarlo.java


注:本文中的org.apache.hadoop.io.SequenceFile.Writer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。