當前位置: 首頁>>代碼示例>>Java>>正文


Java FileSplit.getPath方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.FileSplit.getPath方法的典型用法代碼示例。如果您正苦於以下問題:Java FileSplit.getPath方法的具體用法?Java FileSplit.getPath怎麽用?Java FileSplit.getPath使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.mapreduce.lib.input.FileSplit的用法示例。


在下文中一共展示了FileSplit.getPath方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: map

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void map(final Object key, final OrcStruct value, final Context context) throws IOException, InterruptedException {
    if (value!= null && value.toString() != null && value.toString().isEmpty()) {
        return;
    }

    // Mapper sends data with parent directory path as keys to retain directory structure
    final FileSplit fileSplit = (FileSplit) context.getInputSplit();
    final Path filePath = fileSplit.getPath();
    final String parentFilePath = String.format("%s/", filePath.getParent().toString());
    log.debug("Parent file path {}", parentFilePath);

    if (!fileSizesMap.containsKey(filePath.toString())) {
        if (fileSystem == null){
            final URI uri = URI.create(filePath.toString());
            fileSystem = FileSystem.get(uri, configuration);
        }
        final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
        for (FileStatus fileStatus : listStatuses) {
            if (!fileStatus.isDirectory()) {
                fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
                log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
            }
        }
    }

    final Text parentFilePathKey = new Text(parentFilePath);
    final Text filePathKey = new Text(filePath.toString());
    final OrcValue orcValue = new OrcValue();
    orcValue.value = value;


    final Long fileSize = fileSizesMap.get(filePath.toString());

    if (fileSize < threshold) {
        context.write(parentFilePathKey, orcValue);
    } else {
        context.write(filePathKey, orcValue);
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:43,代碼來源:OrcCompactionMapper.java

示例2: XMLRecordReader

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
 * 初始化讀取資源以及相關的參數也可以放到initialize()方法中去執行
 * @param inputSplit
 * @param context
 * @throws IOException
 */
public XMLRecordReader(InputSplit inputSplit, Configuration context) throws IOException {
    /**
     * 獲取開傳入的開始和結束標簽
     */
    startTag = context.get(START_TAG_KEY).getBytes("UTF-8");
    endTag = context.get(END_TAG_KEY).getBytes("UTF-8");
    FileSplit fileSplit = (FileSplit) inputSplit;
    /**
     * 獲取分片的開始位置和結束的位置
     */
    start = fileSplit.getStart();
    end = start + fileSplit.getLength();
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(context);
    /**
     * 根據分片打開一個HDFS的文件輸入流
     */
    fsin = fs.open(fileSplit.getPath());
    /**
     * 定位到分片開始的位置
     */
    fsin.seek(start);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:30,代碼來源:XmlInputFormat.java

示例3: setup

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
protected void setup(Context context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  keyColName = conf.get(MergeJob.MERGE_KEY_COL_KEY);

  InputSplit is = context.getInputSplit();
  FileSplit fs = (FileSplit) is;
  Path splitPath = fs.getPath();

  if (splitPath.toString().startsWith(
      conf.get(MergeJob.MERGE_NEW_PATH_KEY))) {
    this.isNew = true;
  } else if (splitPath.toString().startsWith(
      conf.get(MergeJob.MERGE_OLD_PATH_KEY))) {
    this.isNew = false;
  } else {
    throw new IOException("File " + splitPath + " is not under new path "
        + conf.get(MergeJob.MERGE_NEW_PATH_KEY) + " or old path "
        + conf.get(MergeJob.MERGE_OLD_PATH_KEY));
  }
}
 
開發者ID:aliyun,項目名稱:aliyun-maxcompute-data-collectors,代碼行數:23,代碼來源:MergeMapperBase.java

示例4: SingleFastqRecordReader

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec        = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}
 
開發者ID:PAA-NCIC,項目名稱:SparkSeq,代碼行數:27,代碼來源:SingleFastqInputFormat.java

示例5: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {

  // what follows is very questionable but a quick test
  // the file is read from HDFS and copied to a temporary location
  FileSplit split = (FileSplit)inputSplit;
  Configuration job = context.getConfiguration();
  Path file = split.getPath();
  FileSystem fs = file.getFileSystem(job);
  java.nio.file.Path tmpFile = Files.createTempFile("tmp", ".zip"); // consider using job and task IDs?
  FSDataInputStream fileIn = fs.open(file);
  FileOutputStream fileOut = new FileOutputStream(tmpFile.toFile());
  LOG.info("Copying from {} to {}", file, tmpFile);
  IOUtils.copyBytes(fileIn, fileOut, 100000, true);

  // having copied the file out of HDFS onto the local FS in a temp folder, we prepare it (sorts files)
  java.nio.file.Path tmpSpace = Files.createTempDirectory("tmp-" + context.getTaskAttemptID().getJobID().getId() +
                                                          ":" + context.getTaskAttemptID().getId());

  reader = new DwCAReader(tmpFile.toAbsolutePath().toString(), tmpSpace.toAbsolutePath().toString());
  nextKeyValue();
}
 
開發者ID:gbif,項目名稱:pipelines,代碼行數:24,代碼來源:DwCAInputFormat.java

示例6: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
		throws IOException {
	super.initialize(inputSplit, context);

	FileSplit split = (FileSplit) inputSplit;
	final Path file = split.getPath();

	String chrName = context.getConfiguration().get(CHROMOSOME);
	String indexPath = context.getConfiguration().get("cram.index.path");

	if (chrName != null) {
		ChromosomeIndex chromosome = null;
		if (indexPath == null)
			chromosome = new ChromosomeIndex(file.toString());
		else
			chromosome = new ChromosomeIndex(file.toString(), indexPath
					+ "/" + file.getName() + ".crai");
		chromosome.setHeader(samFileHeader);
		start = chromosome.getStart(chrName);
		length = chromosome.getEnd(chrName) - start;
		
		sequenceId = samFileHeader.getSequenceIndex(chrName);
		seekableStream.seek(start);
	}
}
 
開發者ID:BGI-flexlab,項目名稱:SOAPgaea,代碼行數:26,代碼來源:GaeaCramChromosomeRecordReader.java

示例7: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    final FileSplit split = (FileSplit) genericSplit;
    final Configuration configuration = context.getConfiguration();
    if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
        this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
    KryoShimServiceLoader.applyConfiguration(ConfUtil.makeApacheConfiguration(configuration));
    this.gryoReader = HadoopPools.getGryoPool().takeReader();
    long start = split.getStart();
    final Path file = split.getPath();
    if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
        throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
    }
    // open the file and seek to the start of the split
    this.inputStream = file.getFileSystem(configuration).open(split.getPath());
    this.splitLength = split.getLength();
    if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}
 
開發者ID:PKUSilvester,項目名稱:LiteGraph,代碼行數:19,代碼來源:GryoRecordReader.java

示例8: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // TODO Auto-generated method stub
    /*
     PDFParser parser = new PDFParser(new FileInputStream(file));
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        String parsedText = pdfStripper.getText(pdDoc);
        //System.out.println(parsedText);
     * */
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    EpubParser epubParser = new EpubParser();
    String parsedText = epubParser.epubParse(fileIn);
    this.lines = parsedText.split("\n");

}
 
開發者ID:arks-api,項目名稱:arks-api,代碼行數:25,代碼來源:EpubRecordReader.java

示例9: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize( final InputSplit inputSplit, final TaskAttemptContext context ) throws IOException, InterruptedException {
  FileSplit fileSplit = (FileSplit)inputSplit;
  Configuration config = context.getConfiguration();
  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem( config );
  long fileLength = fs.getLength( path );
  long start = fileSplit.getStart();
  long length = fileSplit.getLength();
  InputStream in = fs.open( path );
}
 
開發者ID:yahoojapan,項目名稱:multiple-dimension-spread,代碼行數:12,代碼來源:MDSSpreadReader.java

示例10: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
    throws IOException, InterruptedException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration conf = context.getConfiguration();
  SeekableInput in = new FsInput(split.getPath(), conf);
  DatumReader<T> datumReader = new GenericDatumReader<T>();
  this.reader = DataFileReader.openReader(in, datumReader);
  reader.sync(split.getStart());                    // sync to start
  this.start = reader.tell();
  this.end = split.getStart() + split.getLength();
}
 
開發者ID:aliyun,項目名稱:aliyun-maxcompute-data-collectors,代碼行數:13,代碼來源:AvroRecordReader.java

示例11: setup

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
protected void setup(Context context) {
  // Get the thread num from the file number.
  FileSplit split = (FileSplit) context.getInputSplit();
  Path filePath = split.getPath();
  String name = filePath.getName();
  this.threadId = Integer.valueOf(name);

  LOG.info("Thread " + threadId + " : "
      + context.getInputSplit());
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:11,代碼來源:TestLocalRunner.java

示例12: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit)inputSplit;
    if(fileSplit != null && fileSplit.getPath() != null && fileSplit.getPath().toString().endsWith(TEMP_FILE_SUFFIX)){
        LOG.info("Not processing Avro tmp file {}", fileSplit.getPath());
    }else {
        super.initialize(inputSplit, context);
    }
}
 
開發者ID:Comcast,項目名稱:spark-util,代碼行數:10,代碼來源:ErrorHandlingAvroKeyRecordReader.java

示例13: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public void initialize(InputSplit genericSplit, TaskAttemptContext context)  {
    try {
        FileSplit split = (FileSplit)genericSplit;
        Configuration job = context.getConfiguration();
        this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(job);
        this.fileIn = fs.open(file);
        CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
        if(null != codec) {
            this.isCompressedInput = true;
            this.decompressor = CodecPool.getDecompressor(codec);
            if(codec instanceof SplittableCompressionCodec) {
                SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                this.start = cIn.getAdjustedStart();
                this.end = cIn.getAdjustedEnd();
                this.filePosition = cIn;
            } else {
                this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
                this.filePosition = this.fileIn;
            }
        } else {
            this.fileIn.seek(this.start);
            this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
            this.filePosition = this.fileIn;
        }

        if(this.start != 0L) {
            this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
        }

        this.pos = this.start;
    }catch(Exception ex){
        LOG.warn("Exception occurred during initialization {}", ex, ex);
    }

}
 
開發者ID:Comcast,項目名稱:spark-util,代碼行數:41,代碼來源:ErrorHandlingLineRecordReader.java

示例14: map

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void map(final Object key, final Text value, final Context context) throws IOException, InterruptedException {
    if (value!= null && value.toString() != null && value.toString().isEmpty()) {
        return;
    }

    // Mapper sends data with parent directory path as keys to retain directory structure
    final FileSplit fileSplit = (FileSplit) context.getInputSplit();
    final Path filePath = fileSplit.getPath();
    final String parentFilePath = String.format("%s/", filePath.getParent().toString());
    log.debug("Parent file path {}", parentFilePath);

    if (!fileSizesMap.containsKey(filePath.toString())) {
        if (fileSystem == null){
            final URI uri = URI.create(filePath.toString());
            fileSystem = FileSystem.get(uri, configuration);
        }
        final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
        for (FileStatus fileStatus : listStatuses) {
            if (!fileStatus.isDirectory()) {
                fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
                log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
            }
        }
    }

    final Text parentFilePathKey = new Text(parentFilePath);
    final Text filePathKey = new Text(filePath.toString());
    final Long fileSize = fileSizesMap.get(filePath.toString());
    if (fileSize < threshold) {
        context.write(parentFilePathKey, value);
    } else {
        context.write(filePathKey, value);
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:38,代碼來源:TextCompactionMapper.java

示例15: map

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
 * {@inheritDoc}
 */
protected void map(final Object key, final BytesWritable value, final Context context) throws IOException, InterruptedException {
    if (value!= null && value.toString() != null && value.toString().isEmpty()) {
        return;
    }

    // Mapper sends data with parent directory path as keys to retain directory structure
    final FileSplit fileSplit = (FileSplit) context.getInputSplit();
    final Path filePath = fileSplit.getPath();
    final String parentFilePath = String.format("%s/", filePath.getParent().toString());
    log.debug("Parent file path {}", parentFilePath);

    if (!fileSizesMap.containsKey(filePath.toString())) {
        if (fileSystem == null){
            final URI uri = URI.create(filePath.toString());
            fileSystem = FileSystem.get(uri, configuration);
        }
        final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
        for (FileStatus fileStatus : listStatuses) {
            if (!fileStatus.isDirectory()) {
                fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
                log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
            }
        }
    }

    final Text parentFilePathKey = new Text(parentFilePath);
    final Text filePathKey = new Text(filePath.toString());
    final Long fileSize = fileSizesMap.get(filePath.toString());
    if (fileSize < threshold) {
        context.write(parentFilePathKey, value);
    } else {
        context.write(filePathKey, value);
    }
}
 
開發者ID:ExpediaInceCommercePlatform,項目名稱:dataSqueeze,代碼行數:38,代碼來源:BytesWritableCompactionMapper.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.FileSplit.getPath方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。