当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFile.Reader方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.Reader方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.Reader方法的具体用法?Java SequenceFile.Reader怎么用?Java SequenceFile.Reader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.SequenceFile的用法示例。


在下文中一共展示了SequenceFile.Reader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: SequenceFileIterator

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
     * @throws IOException if path can't be read, or its key or value class can't be instantiated
     */

public SequenceFileIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
  key = null;
  value = null;
  FileSystem fs = path.getFileSystem(conf);
  path = path.makeQualified(fs);
  reader = new SequenceFile.Reader(fs, path, conf);
  this.conf = conf;
  keyClass = (Class<K>) reader.getKeyClass();
  valueClass = (Class<V>) reader.getValueClass();
  noValue = NullWritable.class.equals(valueClass);
  this.reuseKeyValueInstances = reuseKeyValueInstances;
}
 
开发者ID:huyang1,项目名称:LDA,代码行数:17,代码来源:SequenceFileIterator.java

示例2: checkResult

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void checkResult(Path listFile, int count) throws IOException {
  if (count == 0) {
    return;
  }

  int recCount = 0;
  SequenceFile.Reader reader = new SequenceFile.Reader(config,
                                          SequenceFile.Reader.file(listFile));
  try {
    Text relPath = new Text();
    CopyListingFileStatus fileStatus = new CopyListingFileStatus();
    while (reader.next(relPath, fileStatus)) {
      if (fileStatus.isDirectory() && relPath.toString().equals("")) {
        // ignore root with empty relPath, which is an entry to be 
        // used for preserving root attributes etc.
        continue;
      }
      Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
      recCount++;
    }
  } finally {
    IOUtils.closeStream(reader);
  }
  Assert.assertEquals(recCount, count);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:TestFileBasedCopyListing.java

示例3: getListingFileReader

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Reader getListingFileReader(Configuration configuration) {

    final Path listingFilePath = getListingFilePath(configuration);
    try {
      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
      if (!fileSystem.exists(listingFilePath))
        throw new IllegalArgumentException("Listing file doesn't exist at: "
                                           + listingFilePath);

      return new SequenceFile.Reader(configuration,
                                     SequenceFile.Reader.file(listingFilePath));
    }
    catch (IOException exception) {
      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
      throw new IllegalArgumentException("Couldn't find listing-file at: "
                                         + listingFilePath, exception);
    }
  }
 
开发者ID:naver,项目名称:hadoop,代码行数:19,代码来源:UniformSizeInputFormat.java

示例4: readPartitions

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
 * Read the cut points from the given IFile.
 * @param fs The file system
 * @param p The path to read
 * @param keyClass The map output key class
 * @param job The job config
 * @throws IOException
 */
                               // matching key types enforced by passing in
@SuppressWarnings("unchecked") // map output key class
private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass,
    Configuration conf) throws IOException {
  SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
  ArrayList<K> parts = new ArrayList<K>();
  K key = ReflectionUtils.newInstance(keyClass, conf);
  NullWritable value = NullWritable.get();
  try {
    while (reader.next(key, value)) {
      parts.add(key);
      key = ReflectionUtils.newInstance(keyClass, conf);
    }
    reader.close();
    reader = null;
  } finally {
    IOUtils.cleanup(LOG, reader);
  }
  return parts.toArray((K[])Array.newInstance(keyClass, parts.size()));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:TotalOrderPartitioner.java

示例5: getSplits

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public List<InputSplit> getSplits(JobContext job)
    throws IOException {

  Configuration conf = job.getConfiguration();
  Path src = new Path(conf.get(INDIRECT_INPUT_FILE, null));
  FileSystem fs = src.getFileSystem(conf);

  List<InputSplit> splits = new ArrayList<InputSplit>();
  LongWritable key = new LongWritable();
  Text value = new Text();
  for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf);
       sl.next(key, value);) {
    splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
  }

  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:18,代码来源:GenericMRLoadGenerator.java

示例6: getSplits

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public InputSplit[] getSplits(JobConf job, int numSplits)
    throws IOException {

  Path src = new Path(job.get(
    org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
    null));
  FileSystem fs = src.getFileSystem(job);

  ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits);
  LongWritable key = new LongWritable();
  Text value = new Text();
  for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job);
       sl.next(key, value);) {
    splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
  }

  return splits.toArray(new IndirectSplit[splits.size()]);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:19,代码来源:GenericMRLoadGenerator.java

示例7: finalize

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
static private void finalize(Configuration conf, JobConf jobconf,
    final Path destPath, String presevedAttributes) throws IOException {
  if (presevedAttributes == null) {
    return;
  }
  EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
  if (!preseved.contains(FileAttribute.USER)
      && !preseved.contains(FileAttribute.GROUP)
      && !preseved.contains(FileAttribute.PERMISSION)) {
    return;
  }

  FileSystem dstfs = destPath.getFileSystem(conf);
  Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
  try (SequenceFile.Reader in =
      new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
    Text dsttext = new Text();
    FilePair pair = new FilePair(); 
    for(; in.next(dsttext, pair); ) {
      Path absdst = new Path(destPath, pair.output);
      updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
          preseved, dstfs);
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:DistCpV1.java

示例8: preserveFileAttributesForDirectories

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
  String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
  final boolean syncOrOverwrite = syncFolder || overwrite;

  LOG.info("About to preserve attributes: " + attrSymbols);

  EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
  final boolean preserveRawXattrs =
      conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);

  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                                    SequenceFile.Reader.file(sourceListing));
  long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

  Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

  long preservedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();

    // Iterate over every source path that was copied.
    while (sourceReader.next(srcRelPath, srcFileStatus)) {
      // File-attributes for files are set at the time of copy,
      // in the map-task.
      if (! srcFileStatus.isDirectory()) continue;

      Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
      //
      // Skip the root folder when syncOrOverwrite is true.
      //
      if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;

      FileSystem targetFS = targetFile.getFileSystem(conf);
      DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,
          preserveRawXattrs);

      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Preserving status on directory entries. [" +
          sourceReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
  }
  LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:49,代码来源:CopyCommitter.java

示例9: getSampleData

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path path) throws IOException {
    SampleDataRecord dataRecord = null;
    if (!fs.exists(path))
        LOG.error("sequence file : " + path.toUri().getPath() + " is not exist on hdfs");
    else {
        try {
            LOG.info("sequencefileanalyzer start parse sampledata for  file path : {}", path.toUri().getPath());
            SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(path));
            List<Object> sampleValues = new ArrayList<Object>();
            Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
            Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
            int count = 0;
            String keyName = "Key";
            String valueName = "Value";
            while (reader.next(key, value) && count < 12) {
                sampleValues.add("{\"" + keyName + "\": \"" + key + "\", \"" + valueName + "\": \"" + value + "\"}");
                count++;
            }
            dataRecord = new SampleDataRecord(path.toUri().getPath(), sampleValues);
            LOG.info("sequence file path : {}, sample data is {}", path.toUri().getPath(), sampleValues);
        } catch (Exception e) {
            LOG.error("path : {} content " + " is not Sequence File format content  ",path.toUri().getPath());
            LOG.info(e.getStackTrace().toString());
        }
    }
    return dataRecord;

}
 
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:30,代码来源:SequenceFileAnalyzer.java

示例10: SeqFileReadable

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public SeqFileReadable(FileSystem fs, Path path, int osBufferSize)
    throws IOException {
  Configuration conf = new Configuration();
  conf.setInt("io.file.buffer.size", osBufferSize);
  reader = new SequenceFile.Reader(fs, path, conf);
  key = new BytesWritable();
  value = new BytesWritable();
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:9,代码来源:TestTFileSeqFileComparison.java

示例11: fetchOutputFromDisk

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private DoubleWritable fetchOutputFromDisk() {
	DoubleWritable totalVal = new DoubleWritable();
	try {
		SequenceFile.Reader companyReader = new SequenceFile.Reader(fs, new Path(
				totalOut.toString() + "/part-r-00000"), conf);
		companyReader.next(new Text(), totalVal);

	} catch (IOException e) {
		e.printStackTrace();
	}
	return totalVal;
}
 
开发者ID:amritbhat786,项目名称:DocIT,代码行数:13,代码来源:Basics.java

示例12: getListingFileReader

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Reader getListingFileReader(Configuration configuration) {

    final Path listingFilePath = getListingFilePath(configuration);
    try {
      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
      if (!fileSystem.exists(listingFilePath)) {
        throw new IllegalArgumentException("Listing file doesn't exist at: " + listingFilePath);
      }

      return new SequenceFile.Reader(configuration, SequenceFile.Reader.file(listingFilePath));
    } catch (IOException exception) {
      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
      throw new IllegalArgumentException("Couldn't find listing-file at: " + listingFilePath, exception);
    }
  }
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:16,代码来源:UniformSizeInputFormat.java

示例13: getReaders

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/** Open the output generated by this format. */
public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir)
  throws IOException {
  FileSystem fs = dir.getFileSystem(conf);
  Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
  
  // sort names, so that hash partitioning works
  Arrays.sort(names);
  
  SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length];
  for (int i = 0; i < names.length; i++) {
    parts[i] = new SequenceFile.Reader(fs, names[i], conf);
  }
  return parts;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:16,代码来源:SequenceFileOutputFormat.java

示例14: SequenceFileAsBinaryRecordReader

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split)
    throws IOException {
  Path path = split.getPath();
  FileSystem fs = path.getFileSystem(conf);
  this.in = new SequenceFile.Reader(fs, path, conf);
  this.end = split.getStart() + split.getLength();
  if (split.getStart() > in.getPosition())
    in.sync(split.getStart());                  // sync to start
  this.start = in.getPosition();
  vbytes = in.createValueBytes();
  done = start >= end;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:13,代码来源:SequenceFileAsBinaryInputFormat.java

示例15: typical

import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Test
public void typical() throws IOException {
  File input = temp.newFolder("input");
  File inputSub2 = new File(input, "sub1/sub2");
  inputSub2.mkdirs();
  Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");

  File listFile = temp.newFile("listFile");
  Path pathToListFile = new Path(listFile.toURI());

  List<Path> sourceDataLocations = new ArrayList<>();
  sourceDataLocations.add(new Path(inputSub2.toURI()));
  DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));

  CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
  CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
  copyListing.doBuildListing(pathToListFile, options);

  try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
    Text key = new Text();
    CopyListingFileStatus value = new CopyListingFileStatus();

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));

    assertTrue(reader.next(key, value));
    assertThat(key.toString(), is("/sub1/sub2/data"));
    assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));

    assertFalse(reader.next(key, value));

  }
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:35,代码来源:CircusTrainCopyListingTest.java


注:本文中的org.apache.hadoop.io.SequenceFile.Reader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。