本文整理汇总了Java中org.apache.hadoop.fs.LocatedFileStatus.getPath方法的典型用法代码示例。如果您正苦于以下问题:Java LocatedFileStatus.getPath方法的具体用法?Java LocatedFileStatus.getPath怎么用?Java LocatedFileStatus.getPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.LocatedFileStatus
的用法示例。
在下文中一共展示了LocatedFileStatus.getPath方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readFileToSearch
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
InterruptedException {
SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
// Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
// what is missing.
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
InputSplit is =
new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
rr.initialize(is, context);
while (rr.nextKeyValue()) {
rr.getCurrentKey();
BytesWritable bw = rr.getCurrentValue();
if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
byte[] key = new byte[rr.getCurrentKey().getLength()];
System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
.getLength());
result.add(key);
}
}
}
return result;
}
示例2: literalAllSegments
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
while (files.hasNext()) {
LocatedFileStatus fileStatus = files.next();
if (!fileStatus.isFile()) {
continue;
}
if (fileStatus.getLen() == 0) {
continue;
}
Path path = fileStatus.getPath();
if (checkSegmentByPath(path)) {
consumer.accept(fileStatus);
}
}
}
示例3: verifyLocatedFileStatus
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private void verifyLocatedFileStatus(
JobConf conf, List<LocatedFileStatus> stats)
throws IOException {
if (!conf.getBoolean("mapred.fileinputformat.verifysplits", true)) {
return;
}
for (LocatedFileStatus stat: stats) {
long fileLen = stat.getLen();
long blockLenTotal = 0;
for (BlockLocation loc: stat.getBlockLocations()) {
blockLenTotal += loc.getLength();
}
if (blockLenTotal != fileLen) {
throw new IOException("Error while getting located status, " +
stat.getPath() + " has length " + fileLen + " but blocks total is " +
blockLenTotal);
}
}
}
示例4: getNextNonemptyReader
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private DataFileReader<T> getNextNonemptyReader() throws IOException {
while (fileIterator != null && fileIterator.hasNext()) {
LocatedFileStatus currentFileStatus = fileIterator.next();
if (isValidFile(currentFileStatus)) {
FileSystemPath currPath = new FileSystemPath(
path.getFileSystem(), currentFileStatus.getPath());
DataFileReader<T> reader =
getSingleFileReader(currPath, readerSchema);
/** Check if the file contains at least one record */
if(reader.hasNext()){
return reader;
} else {
reader.close();
}
}
}
/** fallback */
return null;
}
示例5: selectJobFiles
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private Collection<JobFiles> selectJobFiles(FileSystem fs,
Path processingRoot, int i, int size) throws IOException {
Map<String, JobFiles> jobs = new HashMap<>();
RemoteIterator<LocatedFileStatus> it = fs.listFiles(processingRoot, true);
while (it.hasNext()) {
LocatedFileStatus status = it.next();
Path path = status.getPath();
String fileName = path.getName();
Matcher m = JOB_ID_PARSER.matcher(fileName);
if (!m.matches()) {
continue;
}
String jobId = m.group(1);
int lastId = Integer.parseInt(m.group(2));
int mod = lastId % size;
if (mod != i) {
continue;
}
LOG.info("this mapper will process file " + fileName);
// it's mine
JobFiles jobFiles = jobs.get(jobId);
if (jobFiles == null) {
jobFiles = new JobFiles(jobId);
jobs.put(jobId, jobFiles);
}
setFilePath(fileName, path, jobFiles);
}
return jobs.values();
}
示例6: getSplits
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/**
* Generate the list of files and make them into FileSplits.
*/
public List<InputSplit> getSplits(JobContext job
) throws IOException {
long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
long maxSize = getMaxSplitSize(job);
// generate splits
List<InputSplit> splits = new ArrayList<InputSplit>();
for (LocatedFileStatus file: listLocatedStatus(job)) {
Path path = file.getPath();
long length = file.getLen();
BlockLocation[] blkLocations = file.getBlockLocations();
if ((length != 0) && isSplitable(job, path)) {
long blockSize = file.getBlockSize();
long splitSize = computeSplitSize(blockSize, minSize, maxSize);
long bytesRemaining = length;
while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
splits.add(new FileSplit(path, length-bytesRemaining, splitSize,
blkLocations[blkIndex].getHosts()));
bytesRemaining -= splitSize;
}
if (bytesRemaining != 0) {
splits.add(new FileSplit(path, length-bytesRemaining, bytesRemaining,
blkLocations[blkLocations.length-1].getHosts()));
}
} else if (length != 0) {
splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
} else {
//Create empty hosts array for zero length files
splits.add(new FileSplit(path, 0, length, new String[0]));
}
}
LOG.debug("Total # of splits: " + splits.size());
return splits;
}
示例7: main
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, SQLException {
if(args.length < 3 || args.length > 4) {
System.err.println("Usage: DumpHDFSData namenode_url homedir tablename");
System.err.println(" Dumps the RAW data for the table tablename into a CSV format for debugging purposes");
System.exit(1);
}
String namenodeURL = args[0];
String homeDir = args[1];
String table = args[2];
Configuration conf = new Configuration();
conf.set("fs.default.name", namenodeURL);
FileSystem fs = FileSystem.get(conf);
PrintStream out = new PrintStream(table + ".csv");
try {
String fullTable = RowInputFormat.getFullyQualifiedTableName(table);
String folder = HdfsRegionManager.getRegionFolder(Misc.getRegionPath(fullTable));
RemoteIterator<LocatedFileStatus> fileItr = fs.listFiles(new Path(homeDir + "/" + folder), true);
conf.set(RowInputFormat.HOME_DIR, homeDir);
conf.set(RowInputFormat.INPUT_TABLE, table);
boolean wroteHeader = false;
TaskAttemptContextImpl context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
while(fileItr.hasNext()) {
LocatedFileStatus file = fileItr.next();
Path path = file.getPath();
if(!path.getName().endsWith("hop")) {
continue;
}
CombineFileSplit split = new CombineFileSplit(new Path[] { path } , new long[] { file.getLen()});
RowRecordReader reader = new RowRecordReader();
reader.initialize(split, context);
while(reader.nextKeyValue()) {
Row row = reader.getCurrentValue();
ResultSet rs = row.getRowAsResultSet();
Type op = row.getEventType();
long ts = row.getTimestamp();
int numColumns = rs.getMetaData().getColumnCount();
if(!wroteHeader) {
out.print("timestamp,operation,path");
for(int i =1; i <= numColumns; i++) {
out.print(",");
out.print(rs.getMetaData().getColumnName(i));
}
out.println();
wroteHeader = true;
}
out.print(ts);
out.print(",");
out.print(op);
out.print(",");
out.print(path);
for(int i =1; i <= numColumns; i++) {
out.print(",");
String s= rs.getString(i);
if(s != null) {
s = s.replaceAll("([,\n])", "\\\\1");
} else {
s = "NULL";
}
out.print(s);
}
out.println();
}
}
} finally {
out.close();
}
}
示例8: run
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
@Override
public void run() {
while (true) {
long currentTime = clock.getTime();
try {
LOG.info(Thread.currentThread().getId() + ":Trying to clean " + dirToClean);
FileSystem fs = dirToClean.getFileSystem(conf);
if (!fs.exists(dirToClean)) {
LOG.info(dirToClean + " doesn't exist");
return;
}
RemoteIterator<LocatedFileStatus> itor;
for( itor = fs.listLocatedStatus(dirToClean); itor.hasNext();) {
LocatedFileStatus dirStat = itor.next();
// Check if this is a directory matching the pattern
if (!dirStat.isDir()) {
continue;
}
Path subDirPath = dirStat.getPath();
String dirname = subDirPath.toUri().getPath();
Matcher m = fileToCleanPattern.matcher(dirname);
if (m.find()) {
if (currentTime - dirStat.getModificationTime() > cleanThreshold) {
// recursively delete all the files/dirs
LOG.info("Delete " + subDirPath);
fs.delete(subDirPath, true);
}
}
}
} catch (IOException ioe) {
LOG.error("IOException when clearing dir ", ioe);
}
if (cleanInterval == 0) {
return;
}
try {
Thread.sleep(cleanInterval);
} catch (InterruptedException e) {
}
}
}
示例9: loadSplits
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private CompletableFuture<?> loadSplits()
throws IOException
{
HiveFileIterator files = fileIterators.poll();
if (files == null) {
HivePartitionMetadata partition = partitions.poll();
if (partition == null) {
return COMPLETED_FUTURE;
}
loadPartition(partition);
return COMPLETED_FUTURE;
}
while (files.hasNext() && !stopped) {
LocatedFileStatus file = files.next();
if (isDirectory(file)) {
if (recursiveDirWalkerEnabled) {
HiveFileIterator fileIterator = new HiveFileIterator(
file.getPath(),
files.getFileSystem(),
files.getDirectoryLister(),
files.getNamenodeStats(),
files.getPartitionName(),
files.getInputFormat(),
files.getSchema(),
files.getPartitionKeys(),
files.getEffectivePredicate());
fileIterators.add(fileIterator);
}
}
else {
boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(file.getPath()), file.getPath());
CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(
files.getPartitionName(),
file.getPath().toString(),
file.getBlockLocations(),
0,
file.getLen(),
files.getSchema(),
files.getPartitionKeys(),
splittable,
session,
files.getEffectivePredicate()));
if (!future.isDone()) {
fileIterators.addFirst(files);
return future;
}
}
}
// No need to put the iterator back, since it's either empty or we've stopped
return COMPLETED_FUTURE;
}