当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFile.createWriter方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.createWriter方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.createWriter方法的具体用法?Java SequenceFile.createWriter怎么用?Java SequenceFile.createWriter使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.SequenceFile的用法示例。


在下文中一共展示了SequenceFile.createWriter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: open

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
protected void open(Path dstPath, CompressionCodec codeC,
    CompressionType compType, Configuration conf, FileSystem hdfs)
        throws IOException {
  if(useRawLocalFileSystem) {
    if(hdfs instanceof LocalFileSystem) {
      hdfs = ((LocalFileSystem)hdfs).getRaw();
    } else {
      logger.warn("useRawLocalFileSystem is set to true but file system " +
          "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    }
  }
  if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
          (dstPath)) {
    outStream = hdfs.append(dstPath);
  } else {
    outStream = hdfs.create(dstPath);
  }
  writer = SequenceFile.createWriter(conf, outStream,
      serializer.getKeyClass(), serializer.getValueClass(), compType, codeC);

  registerCurrentStream(outStream, hdfs, dstPath);
}
 
开发者ID:Transwarp-DE,项目名称:Transwarp-Sample-Code,代码行数:23,代码来源:HDFSSequenceFile.java

示例2: createTempFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
void createTempFile(Path p, Configuration conf) throws IOException {
  SequenceFile.Writer writer = null;
  try {
    writer = SequenceFile.createWriter(fs, conf, p,
                                       Text.class, Text.class,
                                       CompressionType.NONE);
    writer.append(new Text("text"), new Text("moretext"));
  } catch(Exception e) {
    throw new IOException(e.getLocalizedMessage());
  } finally {
    if (writer != null) {
      writer.close();
    }
    writer = null;
  }
  LOG.info("created: " + p);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:19,代码来源:TestClientDistributedCacheManager.java

示例3: createControlFiles

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Create control files before a test run.
 * Number of files created is equal to the number of maps specified
 * 
 * @throws IOException on error
 */
private static void createControlFiles() throws IOException {
  FileSystem tempFS = FileSystem.get(config);
  LOG.info("Creating " + numberOfMaps + " control files");

  for (int i = 0; i < numberOfMaps; i++) {
    String strFileName = "NNBench_Controlfile_" + i;
    Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
            strFileName);

    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class, 
              LongWritable.class, CompressionType.NONE);
      writer.append(new Text(strFileName), new LongWritable(0l));
    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:28,代码来源:NNBench.java

示例4: createInputFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void createInputFile(String rootName) throws IOException {
  cleanup();  // clean up if previous run failed

  Path inputFile = new Path(MAP_INPUT_DIR, "in_file");
  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, fsConfig, inputFile, 
                              Text.class, LongWritable.class, CompressionType.NONE);
  
  try {
    nrFiles = 0;
    listSubtree(new Path(rootName), writer);
  } finally {
    writer.close();
  }
  LOG.info("Created map input files.");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:DistributedFSCheck.java

示例5: createFiles

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random,
  Job job) throws IOException {
  Range[] ranges = createRanges(length, numFiles, random);

  for (int i = 0; i < numFiles; i++) {
    Path file = new Path(workDir, "test_" + i + ".seq");
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer =
      SequenceFile.createWriter(localFs, job.getConfiguration(), file,
                                IntWritable.class, BytesWritable.class);
    Range range = ranges[i];
    try {
      for (int j = range.start; j < range.end; j++) {
        IntWritable key = new IntWritable(j);
        byte[] data = new byte[random.nextInt(10)];
        random.nextBytes(data);
        BytesWritable value = new BytesWritable(data);
        writer.append(key, value);
      }
    } finally {
      writer.close();
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:TestCombineSequenceFileInputFormat.java

示例6: writePartitionFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static <T extends WritableComparable<?>> Path writePartitionFile(
    String testname, Configuration conf, T[] splits) throws IOException {
  final FileSystem fs = FileSystem.getLocal(conf);
  final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")
                               ).makeQualified(fs);
  Path p = new Path(testdir, testname + "/_partition.lst");
  TotalOrderPartitioner.setPartitionFile(conf, p);
  conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1);
  SequenceFile.Writer w = null;
  try {
    w = SequenceFile.createWriter(fs, conf, p,
        splits[0].getClass(), NullWritable.class,
        SequenceFile.CompressionType.NONE);
    for (int i = 0; i < splits.length; ++i) {
      w.append(splits[i], NullWritable.get());
    }
  } finally {
    if (null != w)
      w.close();
  }
  return p;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:23,代码来源:TestTotalOrderPartitioner.java

示例7: testSequenceFileSync

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/** Test hsync via SequenceFiles */
@Test
public void testSequenceFileSync() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();

  final FileSystem fs = cluster.getFileSystem();
  final Path p = new Path("/testSequenceFileSync/foo");
  final int len = 1 << 16;
  FSDataOutputStream out = fs.create(p, FsPermission.getDefault(),
      EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK),
      4096, (short) 1, len, null);
  Writer w = SequenceFile.createWriter(new Configuration(),
      Writer.stream(out),
      Writer.keyClass(RandomDatum.class),
      Writer.valueClass(RandomDatum.class),
      Writer.compression(CompressionType.NONE, new DefaultCodec()));
  w.hflush();
  checkSyncMetric(cluster, 0);
  w.hsync();
  checkSyncMetric(cluster, 1);
  int seed = new Random().nextInt();
  RandomDatum.Generator generator = new RandomDatum.Generator(seed);
  generator.next();
  w.append(generator.getKey(), generator.getValue());
  w.hsync();
  checkSyncMetric(cluster, 2);
  w.close();
  checkSyncMetric(cluster, 2);
  out.close();
  checkSyncMetric(cluster, 3);
  cluster.shutdown();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:34,代码来源:TestHSync.java

示例8: createSequenceFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Create a data file in SequenceFile format that gets exported to the db.
 * @param fileNum the number of the file (for multi-file export).
 * @param numRecords how many records to write to the file.
 * @param className the table class name to instantiate and populate
 *          for each record.
 */
private void createSequenceFile(int fileNum, int numRecords, String className)
    throws IOException {

  try {
    // Instantiate the value record object via reflection.
    Class cls = Class.forName(className, true,
        Thread.currentThread().getContextClassLoader());
    SqoopRecord record = (SqoopRecord) ReflectionUtils.newInstance(
        cls, new Configuration());

    // Create the SequenceFile.
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);
    Path tablePath = getTablePath();
    Path filePath = new Path(tablePath, "part" + fileNum);
    fs.mkdirs(tablePath);
    SequenceFile.Writer w = SequenceFile.createWriter(
        fs, conf, filePath, LongWritable.class, cls);

    // Now write the data.
    int startId = fileNum * numRecords;
    for (int i = 0; i < numRecords; i++) {
      record.parse(getRecordLine(startId + i));
      w.append(new LongWritable(startId + i), record);
    }

    w.close();
  } catch (ClassNotFoundException cnfe) {
    throw new IOException(cnfe);
  } catch (RecordParser.ParseError pe) {
    throw new IOException(pe);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:44,代码来源:TestExport.java

示例9: createControlFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public static void createControlFile(FileSystem fs,
                                     long megaBytes, int numFiles,
                                     long seed) throws Exception {

  LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");

  Path controlFile = new Path(CONTROL_DIR, "files");
  fs.delete(controlFile, true);
  Random random = new Random(seed);

  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, conf, controlFile, 
                              Text.class, LongWritable.class, CompressionType.NONE);

  long totalSize = 0;
  long maxSize = ((megaBytes / numFiles) * 2) + 1;
  try {
    while (totalSize < megaBytes) {
      Text name = new Text(Long.toString(random.nextLong()));

      long size = random.nextLong();
      if (size < 0)
        size = -size;
      size = size % maxSize;

      //LOG.info(" adding: name="+name+" size="+size);

      writer.append(name, new LongWritable(size));

      totalSize += size;
    }
  } finally {
    writer.close();
  }
  LOG.info("created control file for: "+totalSize+" bytes");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:37,代码来源:TestFileSystem.java

示例10: createControlFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
private void createControlFile(FileSystem fs,
                                long nrBytes, // in bytes
                                int nrFiles
                              ) throws IOException {
  LOG.info("creating control file: "+nrBytes+" bytes, "+nrFiles+" files");

  Path controlDir = getControlDir(config);
  fs.delete(controlDir, true);

  for(int i=0; i < nrFiles; i++) {
    String name = getFileName(i);
    Path controlFile = new Path(controlDir, "in_file_" + name);
    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(fs, config, controlFile,
                                         Text.class, LongWritable.class,
                                         CompressionType.NONE);
      writer.append(new Text(name), new LongWritable(nrBytes));
    } catch(Exception e) {
      throw new IOException(e.getLocalizedMessage());
    } finally {
      if (writer != null)
        writer.close();
      writer = null;
    }
  }
  LOG.info("created control files for: "+nrFiles+" files");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:TestDFSIO.java

示例11: writePartitionFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
void writePartitionFile(Configuration conf, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  @SuppressWarnings("deprecation")
  SequenceFile.Writer writer = SequenceFile.createWriter(
    fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
  
  for (int i = 0; i < partitions.size(); i++) {
    writer.append(partitions.get(i), NullWritable.get());
  }
  writer.close();
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:12,代码来源:HashTable.java

示例12: writePartitionFile

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 */
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K,V> void writePartitionFile(Job job, Sampler<K,V> sampler) 
    throws IOException, ClassNotFoundException, InterruptedException {
  Configuration conf = job.getConfiguration();
  final InputFormat inf = 
      ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
  int numPartitions = job.getNumReduceTasks();
  K[] samples = (K[])sampler.getSample(inf, job);
  LOG.info("Using " + samples.length + " samples");
  RawComparator<K> comparator =
    (RawComparator<K>) job.getSortComparator();
  Arrays.sort(samples, comparator);
  Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
  FileSystem fs = dst.getFileSystem(conf);
  if (fs.exists(dst)) {
    fs.delete(dst, false);
  }
  SequenceFile.Writer writer = SequenceFile.createWriter(fs, 
    conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
  NullWritable nullValue = NullWritable.get();
  float stepSize = samples.length / (float) numPartitions;
  int last = -1;
  for(int i = 1; i < numPartitions; ++i) {
    int k = Math.round(stepSize * i);
    while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
      ++k;
    }
    writer.append(samples[k], nullValue);
    last = k;
  }
  writer.close();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:39,代码来源:InputSampler.java

示例13: writeSkippedRec

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
private void writeSkippedRec(K key, V value) throws IOException{
  if(skipWriter==null) {
    Path skipDir = SkipBadRecords.getSkipOutputPath(conf);
    Path skipFile = new Path(skipDir, getTaskID().toString());
    skipWriter = 
      SequenceFile.createWriter(
          skipFile.getFileSystem(conf), conf, skipFile,
          (Class<K>) createKey().getClass(),
          (Class<V>) createValue().getClass(), 
          CompressionType.BLOCK, getTaskReporter());
  }
  skipWriter.append(key, value);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:15,代码来源:MapTask.java

示例14: configure

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Before
public void configure() throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = FileSystem.get(conf);
  fs.delete(testdir, true);
  conf.setInt(JobContext.IO_SORT_MB, 1);
  conf.setInputFormat(SequenceFileInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setMapperClass(TextGen.class);
  conf.setReducerClass(TextReduce.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class); 
  
  conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);   
 
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  Path inFile = new Path(inDir, "part0");
  SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile,
                                                         Text.class, Text.class);
  writer.append(new Text("rec: 1"), new Text("Hello"));
  writer.close();
  
  jc = new JobClient(conf);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:34,代码来源:TestMapOutputType.java

示例15: writeDistCacheFilesList

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Write the list of distributed cache files in the decreasing order of
 * file sizes into the sequence file. This file will be input to the job
 * {@link GenerateDistCacheData}.
 * Also validates if -generate option is missing and distributed cache files
 * are missing.
 * @return exit code
 * @throws IOException
 */
private int writeDistCacheFilesList()
    throws IOException {
  // Sort the distributed cache files in the decreasing order of file sizes.
  List dcFiles = new ArrayList(distCacheFiles.entrySet());
  Collections.sort(dcFiles, new Comparator() {
    public int compare(Object dc1, Object dc2) {
      return ((Comparable) ((Map.Entry) (dc2)).getValue())
          .compareTo(((Map.Entry) (dc1)).getValue());
    }
  });

  // write the sorted distributed cache files to the sequence file
  FileSystem fs = FileSystem.get(conf);
  Path distCacheFilesList = new Path(distCachePath, "_distCacheFiles.txt");
  conf.set(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST,
      distCacheFilesList.toString());
  SequenceFile.Writer src_writer = SequenceFile.createWriter(fs, conf,
      distCacheFilesList, LongWritable.class, BytesWritable.class,
      SequenceFile.CompressionType.NONE);

  // Total number of unique distributed cache files
  int fileCount = dcFiles.size();
  long byteCount = 0;// Total size of all distributed cache files
  long bytesSync = 0;// Bytes after previous sync;used to add sync marker

  for (Iterator it = dcFiles.iterator(); it.hasNext();) {
    Map.Entry entry = (Map.Entry)it.next();
    LongWritable fileSize =
        new LongWritable(Long.parseLong(entry.getValue().toString()));
    BytesWritable filePath =
        new BytesWritable(
        entry.getKey().toString().getBytes(charsetUTF8));

    byteCount += fileSize.get();
    bytesSync += fileSize.get();
    if (bytesSync > AVG_BYTES_PER_MAP) {
      src_writer.sync();
      bytesSync = fileSize.get();
    }
    src_writer.append(fileSize, filePath);
  }
  if (src_writer != null) {
    src_writer.close();
  }
  // Set delete on exit for 'dist cache files list' as it is not needed later.
  fs.deleteOnExit(distCacheFilesList);

  conf.setInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, fileCount);
  conf.setLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, byteCount);
  LOG.info("Number of HDFS based distributed cache files to be generated is "
      + fileCount + ". Total size of HDFS based distributed cache files "
      + "to be generated is " + byteCount);

  if (!shouldGenerateDistCacheData() && fileCount > 0) {
    LOG.error("Missing " + fileCount + " distributed cache files under the "
        + " directory\n" + distCachePath + "\nthat are needed for gridmix"
        + " to emulate distributed cache load. Either use -generate\noption"
        + " to generate distributed cache data along with input data OR "
        + "disable\ndistributed cache emulation by configuring '"
        + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
        + "' to false.");
    return Gridmix.MISSING_DIST_CACHE_FILES_ERROR;
  }
  return 0;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:75,代码来源:DistributedCacheEmulator.java


注:本文中的org.apache.hadoop.io.SequenceFile.createWriter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。