当前位置: 首页>>代码示例>>Java>>正文


Java LocatedFileStatus.getPath方法代码示例

本文整理汇总了Java中org.apache.hadoop.fs.LocatedFileStatus.getPath方法的典型用法代码示例。如果您正苦于以下问题:Java LocatedFileStatus.getPath方法的具体用法?Java LocatedFileStatus.getPath怎么用?Java LocatedFileStatus.getPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.fs.LocatedFileStatus的用法示例。


在下文中一共展示了LocatedFileStatus.getPath方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readFileToSearch

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
    final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
    InterruptedException {
  SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
  // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
  // what is missing.
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
      new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
    InputSplit is =
      new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
    rr.initialize(is, context);
    while (rr.nextKeyValue()) {
      rr.getCurrentKey();
      BytesWritable bw = rr.getCurrentValue();
      if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
        byte[] key = new byte[rr.getCurrentKey().getLength()];
        System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
            .getLength());
        result.add(key);
      }
    }
  }
  return result;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:26,代码来源:IntegrationTestBigLinkedList.java

示例2: literalAllSegments

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
    RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
    while (files.hasNext()) {
        LocatedFileStatus fileStatus = files.next();
        if (!fileStatus.isFile()) {
            continue;
        }
        if (fileStatus.getLen() == 0) {
            continue;
        }

        Path path = fileStatus.getPath();
        if (checkSegmentByPath(path)) {
            consumer.accept(fileStatus);
        }
    }
}
 
开发者ID:shunfei,项目名称:indexr,代码行数:18,代码来源:SegmentHelper.java

示例3: verifyLocatedFileStatus

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private void verifyLocatedFileStatus(
    JobConf conf, List<LocatedFileStatus> stats)
    throws IOException {
  if (!conf.getBoolean("mapred.fileinputformat.verifysplits", true)) {
    return;
  }
  for (LocatedFileStatus stat: stats) {
    long fileLen = stat.getLen();
    long blockLenTotal = 0;
    for (BlockLocation loc: stat.getBlockLocations()) {
      blockLenTotal += loc.getLength();
    }
    if (blockLenTotal != fileLen) {
      throw new IOException("Error while getting located status, " +
        stat.getPath() + " has length " + fileLen + " but blocks total is " +
        blockLenTotal);
    }
  }
}
 
开发者ID:rhli,项目名称:hadoop-EAR,代码行数:20,代码来源:FileInputFormat.java

示例4: getNextNonemptyReader

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private DataFileReader<T> getNextNonemptyReader() throws IOException {
	while (fileIterator != null && fileIterator.hasNext()) {
		LocatedFileStatus currentFileStatus = fileIterator.next();
		if (isValidFile(currentFileStatus)) {
			FileSystemPath currPath = new FileSystemPath(
					path.getFileSystem(), currentFileStatus.getPath());
			DataFileReader<T> reader = 
					getSingleFileReader(currPath, readerSchema);
			/** Check if the file contains at least one record */
			if(reader.hasNext()){
				return reader;
			} else {
				reader.close();
			}
		}
	}
	/** fallback */
	return null;
}
 
开发者ID:openaire,项目名称:iis,代码行数:20,代码来源:AvroDataStoreReader.java

示例5: selectJobFiles

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private Collection<JobFiles> selectJobFiles(FileSystem fs,
    Path processingRoot, int i, int size) throws IOException {
  Map<String, JobFiles> jobs = new HashMap<>();
  RemoteIterator<LocatedFileStatus> it = fs.listFiles(processingRoot, true);
  while (it.hasNext()) {
    LocatedFileStatus status = it.next();
    Path path = status.getPath();
    String fileName = path.getName();
    Matcher m = JOB_ID_PARSER.matcher(fileName);
    if (!m.matches()) {
      continue;
    }
    String jobId = m.group(1);
    int lastId = Integer.parseInt(m.group(2));
    int mod = lastId % size;
    if (mod != i) {
      continue;
    }
    LOG.info("this mapper will process file " + fileName);
    // it's mine
    JobFiles jobFiles = jobs.get(jobId);
    if (jobFiles == null) {
      jobFiles = new JobFiles(jobId);
      jobs.put(jobId, jobFiles);
    }
    setFilePath(fileName, path, jobFiles);
  }
  return jobs.values();
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:30,代码来源:JobHistoryFileReplayHelper.java

示例6: getSplits

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/** 
 * Generate the list of files and make them into FileSplits.
 */ 
public List<InputSplit> getSplits(JobContext job
                                  ) throws IOException {
  long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
  long maxSize = getMaxSplitSize(job);

  // generate splits
  List<InputSplit> splits = new ArrayList<InputSplit>();
  for (LocatedFileStatus file: listLocatedStatus(job)) {
    Path path = file.getPath();
    long length = file.getLen();
    BlockLocation[] blkLocations = file.getBlockLocations();

    if ((length != 0) && isSplitable(job, path)) { 
      long blockSize = file.getBlockSize();
      long splitSize = computeSplitSize(blockSize, minSize, maxSize);

      long bytesRemaining = length;
      while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
        int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
        splits.add(new FileSplit(path, length-bytesRemaining, splitSize, 
                                 blkLocations[blkIndex].getHosts()));
        bytesRemaining -= splitSize;
      }
      
      if (bytesRemaining != 0) {
        splits.add(new FileSplit(path, length-bytesRemaining, bytesRemaining, 
                   blkLocations[blkLocations.length-1].getHosts()));
      }
    } else if (length != 0) {
      splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
    } else { 
      //Create empty hosts array for zero length files
      splits.add(new FileSplit(path, 0, length, new String[0]));
    }
  }
  LOG.debug("Total # of splits: " + splits.size());
  return splits;
}
 
开发者ID:rhli,项目名称:hadoop-EAR,代码行数:42,代码来源:FileInputFormat.java

示例7: main

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, SQLException {
  if(args.length < 3 || args.length > 4) {
    System.err.println("Usage: DumpHDFSData namenode_url homedir tablename");
    System.err.println("  Dumps the RAW data for the table tablename into a CSV format for debugging purposes");
    System.exit(1);
  }
  
  
  String namenodeURL = args[0];
  String homeDir = args[1];
  String table = args[2];
  
  
  Configuration conf = new Configuration();
  conf.set("fs.default.name", namenodeURL);
  FileSystem fs = FileSystem.get(conf);
  
  PrintStream out = new PrintStream(table + ".csv");
  try {
    String fullTable = RowInputFormat.getFullyQualifiedTableName(table);
    String folder = HdfsRegionManager.getRegionFolder(Misc.getRegionPath(fullTable));
    RemoteIterator<LocatedFileStatus> fileItr = fs.listFiles(new Path(homeDir + "/" + folder), true);
    
    conf.set(RowInputFormat.HOME_DIR, homeDir);
    conf.set(RowInputFormat.INPUT_TABLE, table);
    
    boolean wroteHeader = false;
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    
    while(fileItr.hasNext()) {
      LocatedFileStatus file = fileItr.next();
      Path path = file.getPath();
      if(!path.getName().endsWith("hop")) {
        continue;
      }
      CombineFileSplit split = new CombineFileSplit(new Path[] { path } , new long[] { file.getLen()});
      RowRecordReader reader = new RowRecordReader();
      reader.initialize(split, context);
      while(reader.nextKeyValue()) {
        Row row = reader.getCurrentValue();
        ResultSet rs = row.getRowAsResultSet();
        Type op = row.getEventType();
        long ts = row.getTimestamp();

        int numColumns = rs.getMetaData().getColumnCount();
        if(!wroteHeader) {
          out.print("timestamp,operation,path");
          for(int i =1; i <= numColumns; i++) {
            out.print(",");
            out.print(rs.getMetaData().getColumnName(i));
          }
          out.println();
          wroteHeader = true;
        }

        out.print(ts);
        out.print(",");
        out.print(op);
        out.print(",");
        out.print(path);
        for(int i =1; i <= numColumns; i++) {
          out.print(",");
          String s= rs.getString(i);
          if(s != null) {
            s = s.replaceAll("([,\n])", "\\\\1");
          } else {
            s = "NULL";
          }
          out.print(s);
        }
        out.println();
      }
    }
  
  } finally {
    out.close();
  }
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:79,代码来源:DumpHDFSData.java

示例8: run

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
@Override
public void run() {
  while (true) {
    long currentTime = clock.getTime();
    try {
      LOG.info(Thread.currentThread().getId() + ":Trying to clean " + dirToClean);
      FileSystem fs = dirToClean.getFileSystem(conf);
      if (!fs.exists(dirToClean)) {
        LOG.info(dirToClean + " doesn't exist");
        return;
      }

      RemoteIterator<LocatedFileStatus> itor;
      for( itor = fs.listLocatedStatus(dirToClean); itor.hasNext();) {
        LocatedFileStatus dirStat = itor.next();
        // Check if this is a directory matching the pattern
        if (!dirStat.isDir()) {
          continue;
        }
        Path subDirPath = dirStat.getPath();
        String dirname = subDirPath.toUri().getPath();
        Matcher m = fileToCleanPattern.matcher(dirname);
        if (m.find()) {
          if (currentTime - dirStat.getModificationTime() > cleanThreshold) {
            // recursively delete all the files/dirs
            LOG.info("Delete " + subDirPath);
            fs.delete(subDirPath, true);
          }
        }
      }
    } catch (IOException ioe) {
      LOG.error("IOException when clearing dir ", ioe);
    }
    if (cleanInterval == 0) {
      return;
    }
    try {
      Thread.sleep(cleanInterval);
    } catch (InterruptedException e) {
    }
  }
}
 
开发者ID:rhli,项目名称:hadoop-EAR,代码行数:43,代码来源:ExpireUnusedJobFiles.java

示例9: loadSplits

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private CompletableFuture<?> loadSplits()
        throws IOException
{
    HiveFileIterator files = fileIterators.poll();
    if (files == null) {
        HivePartitionMetadata partition = partitions.poll();
        if (partition == null) {
            return COMPLETED_FUTURE;
        }
        loadPartition(partition);
        return COMPLETED_FUTURE;
    }

    while (files.hasNext() && !stopped) {
        LocatedFileStatus file = files.next();
        if (isDirectory(file)) {
            if (recursiveDirWalkerEnabled) {
                HiveFileIterator fileIterator = new HiveFileIterator(
                        file.getPath(),
                        files.getFileSystem(),
                        files.getDirectoryLister(),
                        files.getNamenodeStats(),
                        files.getPartitionName(),
                        files.getInputFormat(),
                        files.getSchema(),
                        files.getPartitionKeys(),
                        files.getEffectivePredicate());
                fileIterators.add(fileIterator);
            }
        }
        else {
            boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(file.getPath()), file.getPath());

            CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(
                    files.getPartitionName(),
                    file.getPath().toString(),
                    file.getBlockLocations(),
                    0,
                    file.getLen(),
                    files.getSchema(),
                    files.getPartitionKeys(),
                    splittable,
                    session,
                    files.getEffectivePredicate()));
            if (!future.isDone()) {
                fileIterators.addFirst(files);
                return future;
            }
        }
    }

    // No need to put the iterator back, since it's either empty or we've stopped
    return COMPLETED_FUTURE;
}
 
开发者ID:y-lan,项目名称:presto,代码行数:55,代码来源:BackgroundHiveSplitLoader.java


注:本文中的org.apache.hadoop.fs.LocatedFileStatus.getPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。