本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.Reader方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.Reader方法的具体用法?Java SequenceFile.Reader怎么用?Java SequenceFile.Reader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.SequenceFile
的用法示例。
在下文中一共展示了SequenceFile.Reader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: SequenceFileIterator
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* @throws IOException if path can't be read, or its key or value class can't be instantiated
*/
public SequenceFileIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException {
key = null;
value = null;
FileSystem fs = path.getFileSystem(conf);
path = path.makeQualified(fs);
reader = new SequenceFile.Reader(fs, path, conf);
this.conf = conf;
keyClass = (Class<K>) reader.getKeyClass();
valueClass = (Class<V>) reader.getValueClass();
noValue = NullWritable.class.equals(valueClass);
this.reuseKeyValueInstances = reuseKeyValueInstances;
}
示例2: checkResult
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void checkResult(Path listFile, int count) throws IOException {
if (count == 0) {
return;
}
int recCount = 0;
SequenceFile.Reader reader = new SequenceFile.Reader(config,
SequenceFile.Reader.file(listFile));
try {
Text relPath = new Text();
CopyListingFileStatus fileStatus = new CopyListingFileStatus();
while (reader.next(relPath, fileStatus)) {
if (fileStatus.isDirectory() && relPath.toString().equals("")) {
// ignore root with empty relPath, which is an entry to be
// used for preserving root attributes etc.
continue;
}
Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
recCount++;
}
} finally {
IOUtils.closeStream(reader);
}
Assert.assertEquals(recCount, count);
}
示例3: getListingFileReader
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Reader getListingFileReader(Configuration configuration) {
final Path listingFilePath = getListingFilePath(configuration);
try {
final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
if (!fileSystem.exists(listingFilePath))
throw new IllegalArgumentException("Listing file doesn't exist at: "
+ listingFilePath);
return new SequenceFile.Reader(configuration,
SequenceFile.Reader.file(listingFilePath));
}
catch (IOException exception) {
LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
throw new IllegalArgumentException("Couldn't find listing-file at: "
+ listingFilePath, exception);
}
}
示例4: readPartitions
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Read the cut points from the given IFile.
* @param fs The file system
* @param p The path to read
* @param keyClass The map output key class
* @param job The job config
* @throws IOException
*/
// matching key types enforced by passing in
@SuppressWarnings("unchecked") // map output key class
private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass,
Configuration conf) throws IOException {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
ArrayList<K> parts = new ArrayList<K>();
K key = ReflectionUtils.newInstance(keyClass, conf);
NullWritable value = NullWritable.get();
try {
while (reader.next(key, value)) {
parts.add(key);
key = ReflectionUtils.newInstance(keyClass, conf);
}
reader.close();
reader = null;
} finally {
IOUtils.cleanup(LOG, reader);
}
return parts.toArray((K[])Array.newInstance(keyClass, parts.size()));
}
示例5: getSplits
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public List<InputSplit> getSplits(JobContext job)
throws IOException {
Configuration conf = job.getConfiguration();
Path src = new Path(conf.get(INDIRECT_INPUT_FILE, null));
FileSystem fs = src.getFileSystem(conf);
List<InputSplit> splits = new ArrayList<InputSplit>();
LongWritable key = new LongWritable();
Text value = new Text();
for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf);
sl.next(key, value);) {
splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
}
return splits;
}
示例6: getSplits
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public InputSplit[] getSplits(JobConf job, int numSplits)
throws IOException {
Path src = new Path(job.get(
org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
null));
FileSystem fs = src.getFileSystem(job);
ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits);
LongWritable key = new LongWritable();
Text value = new Text();
for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job);
sl.next(key, value);) {
splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
}
return splits.toArray(new IndirectSplit[splits.size()]);
}
示例7: finalize
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
static private void finalize(Configuration conf, JobConf jobconf,
final Path destPath, String presevedAttributes) throws IOException {
if (presevedAttributes == null) {
return;
}
EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
if (!preseved.contains(FileAttribute.USER)
&& !preseved.contains(FileAttribute.GROUP)
&& !preseved.contains(FileAttribute.PERMISSION)) {
return;
}
FileSystem dstfs = destPath.getFileSystem(conf);
Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
try (SequenceFile.Reader in =
new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
Text dsttext = new Text();
FilePair pair = new FilePair();
for(; in.next(dsttext, pair); ) {
Path absdst = new Path(destPath, pair.output);
updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
preseved, dstfs);
}
}
}
示例8: preserveFileAttributesForDirectories
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
final boolean syncOrOverwrite = syncFolder || overwrite;
LOG.info("About to preserve attributes: " + attrSymbols);
EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
final boolean preserveRawXattrs =
conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sourceListing));
long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
long preservedEntries = 0;
try {
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
// Iterate over every source path that was copied.
while (sourceReader.next(srcRelPath, srcFileStatus)) {
// File-attributes for files are set at the time of copy,
// in the map-task.
if (! srcFileStatus.isDirectory()) continue;
Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
//
// Skip the root folder when syncOrOverwrite is true.
//
if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;
FileSystem targetFS = targetFile.getFileSystem(conf);
DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,
preserveRawXattrs);
taskAttemptContext.progress();
taskAttemptContext.setStatus("Preserving status on directory entries. [" +
sourceReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
}
LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}
示例9: getSampleData
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path path) throws IOException {
SampleDataRecord dataRecord = null;
if (!fs.exists(path))
LOG.error("sequence file : " + path.toUri().getPath() + " is not exist on hdfs");
else {
try {
LOG.info("sequencefileanalyzer start parse sampledata for file path : {}", path.toUri().getPath());
SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(path));
List<Object> sampleValues = new ArrayList<Object>();
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf());
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf());
int count = 0;
String keyName = "Key";
String valueName = "Value";
while (reader.next(key, value) && count < 12) {
sampleValues.add("{\"" + keyName + "\": \"" + key + "\", \"" + valueName + "\": \"" + value + "\"}");
count++;
}
dataRecord = new SampleDataRecord(path.toUri().getPath(), sampleValues);
LOG.info("sequence file path : {}, sample data is {}", path.toUri().getPath(), sampleValues);
} catch (Exception e) {
LOG.error("path : {} content " + " is not Sequence File format content ",path.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
}
return dataRecord;
}
示例10: SeqFileReadable
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public SeqFileReadable(FileSystem fs, Path path, int osBufferSize)
throws IOException {
Configuration conf = new Configuration();
conf.setInt("io.file.buffer.size", osBufferSize);
reader = new SequenceFile.Reader(fs, path, conf);
key = new BytesWritable();
value = new BytesWritable();
}
示例11: fetchOutputFromDisk
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private DoubleWritable fetchOutputFromDisk() {
DoubleWritable totalVal = new DoubleWritable();
try {
SequenceFile.Reader companyReader = new SequenceFile.Reader(fs, new Path(
totalOut.toString() + "/part-r-00000"), conf);
companyReader.next(new Text(), totalVal);
} catch (IOException e) {
e.printStackTrace();
}
return totalVal;
}
示例12: getListingFileReader
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Reader getListingFileReader(Configuration configuration) {
final Path listingFilePath = getListingFilePath(configuration);
try {
final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
if (!fileSystem.exists(listingFilePath)) {
throw new IllegalArgumentException("Listing file doesn't exist at: " + listingFilePath);
}
return new SequenceFile.Reader(configuration, SequenceFile.Reader.file(listingFilePath));
} catch (IOException exception) {
LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
throw new IllegalArgumentException("Couldn't find listing-file at: " + listingFilePath, exception);
}
}
示例13: getReaders
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/** Open the output generated by this format. */
public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir)
throws IOException {
FileSystem fs = dir.getFileSystem(conf);
Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
// sort names, so that hash partitioning works
Arrays.sort(names);
SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length];
for (int i = 0; i < names.length; i++) {
parts[i] = new SequenceFile.Reader(fs, names[i], conf);
}
return parts;
}
示例14: SequenceFileAsBinaryRecordReader
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split)
throws IOException {
Path path = split.getPath();
FileSystem fs = path.getFileSystem(conf);
this.in = new SequenceFile.Reader(fs, path, conf);
this.end = split.getStart() + split.getLength();
if (split.getStart() > in.getPosition())
in.sync(split.getStart()); // sync to start
this.start = in.getPosition();
vbytes = in.createValueBytes();
done = start >= end;
}
示例15: typical
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Test
public void typical() throws IOException {
File input = temp.newFolder("input");
File inputSub2 = new File(input, "sub1/sub2");
inputSub2.mkdirs();
Files.asCharSink(new File(inputSub2, "data"), UTF_8).write("test1");
File listFile = temp.newFile("listFile");
Path pathToListFile = new Path(listFile.toURI());
List<Path> sourceDataLocations = new ArrayList<>();
sourceDataLocations.add(new Path(inputSub2.toURI()));
DistCpOptions options = new DistCpOptions(sourceDataLocations, new Path("dummy"));
CircusTrainCopyListing.setRootPath(conf, new Path(input.toURI()));
CircusTrainCopyListing copyListing = new CircusTrainCopyListing(conf, null);
copyListing.doBuildListing(pathToListFile, options);
try (Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(pathToListFile))) {
Text key = new Text();
CopyListingFileStatus value = new CopyListingFileStatus();
assertTrue(reader.next(key, value));
assertThat(key.toString(), is("/sub1/sub2"));
assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2"));
assertTrue(reader.next(key, value));
assertThat(key.toString(), is("/sub1/sub2/data"));
assertThat(value.getPath().toUri().toString(), endsWith("/input/sub1/sub2/data"));
assertFalse(reader.next(key, value));
}
}