本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.FileSplit.getPath方法的典型用法代碼示例。如果您正苦於以下問題:Java FileSplit.getPath方法的具體用法?Java FileSplit.getPath怎麽用?Java FileSplit.getPath使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapreduce.lib.input.FileSplit
的用法示例。
在下文中一共展示了FileSplit.getPath方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: map
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
* {@inheritDoc}
*/
protected void map(final Object key, final OrcStruct value, final Context context) throws IOException, InterruptedException {
if (value!= null && value.toString() != null && value.toString().isEmpty()) {
return;
}
// Mapper sends data with parent directory path as keys to retain directory structure
final FileSplit fileSplit = (FileSplit) context.getInputSplit();
final Path filePath = fileSplit.getPath();
final String parentFilePath = String.format("%s/", filePath.getParent().toString());
log.debug("Parent file path {}", parentFilePath);
if (!fileSizesMap.containsKey(filePath.toString())) {
if (fileSystem == null){
final URI uri = URI.create(filePath.toString());
fileSystem = FileSystem.get(uri, configuration);
}
final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
for (FileStatus fileStatus : listStatuses) {
if (!fileStatus.isDirectory()) {
fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
}
}
}
final Text parentFilePathKey = new Text(parentFilePath);
final Text filePathKey = new Text(filePath.toString());
final OrcValue orcValue = new OrcValue();
orcValue.value = value;
final Long fileSize = fileSizesMap.get(filePath.toString());
if (fileSize < threshold) {
context.write(parentFilePathKey, orcValue);
} else {
context.write(filePathKey, orcValue);
}
}
示例2: XMLRecordReader
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
* 初始化讀取資源以及相關的參數也可以放到initialize()方法中去執行
* @param inputSplit
* @param context
* @throws IOException
*/
public XMLRecordReader(InputSplit inputSplit, Configuration context) throws IOException {
/**
* 獲取開傳入的開始和結束標簽
*/
startTag = context.get(START_TAG_KEY).getBytes("UTF-8");
endTag = context.get(END_TAG_KEY).getBytes("UTF-8");
FileSplit fileSplit = (FileSplit) inputSplit;
/**
* 獲取分片的開始位置和結束的位置
*/
start = fileSplit.getStart();
end = start + fileSplit.getLength();
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(context);
/**
* 根據分片打開一個HDFS的文件輸入流
*/
fsin = fs.open(fileSplit.getPath());
/**
* 定位到分片開始的位置
*/
fsin.seek(start);
}
示例3: setup
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
keyColName = conf.get(MergeJob.MERGE_KEY_COL_KEY);
InputSplit is = context.getInputSplit();
FileSplit fs = (FileSplit) is;
Path splitPath = fs.getPath();
if (splitPath.toString().startsWith(
conf.get(MergeJob.MERGE_NEW_PATH_KEY))) {
this.isNew = true;
} else if (splitPath.toString().startsWith(
conf.get(MergeJob.MERGE_OLD_PATH_KEY))) {
this.isNew = false;
} else {
throw new IOException("File " + splitPath + " is not under new path "
+ conf.get(MergeJob.MERGE_NEW_PATH_KEY) + " or old path "
+ conf.get(MergeJob.MERGE_OLD_PATH_KEY));
}
}
示例4: SingleFastqRecordReader
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
file = split.getPath();
start = split.getStart();
end = start + split.getLength();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(file);
if (codec == null) { // no codec. Uncompressed file.
positionAtFirstRecord(fileIn);
inputStream = fileIn;
} else {
// compressed file
if (start != 0) {
throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
}
inputStream = codec.createInputStream(fileIn);
end = Long.MAX_VALUE; // read until the end of the file
}
lineReader = new LineReader(inputStream);
}
示例5: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
// what follows is very questionable but a quick test
// the file is read from HDFS and copied to a temporary location
FileSplit split = (FileSplit)inputSplit;
Configuration job = context.getConfiguration();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
java.nio.file.Path tmpFile = Files.createTempFile("tmp", ".zip"); // consider using job and task IDs?
FSDataInputStream fileIn = fs.open(file);
FileOutputStream fileOut = new FileOutputStream(tmpFile.toFile());
LOG.info("Copying from {} to {}", file, tmpFile);
IOUtils.copyBytes(fileIn, fileOut, 100000, true);
// having copied the file out of HDFS onto the local FS in a temp folder, we prepare it (sorts files)
java.nio.file.Path tmpSpace = Files.createTempDirectory("tmp-" + context.getTaskAttemptID().getJobID().getId() +
":" + context.getTaskAttemptID().getId());
reader = new DwCAReader(tmpFile.toAbsolutePath().toString(), tmpSpace.toAbsolutePath().toString());
nextKeyValue();
}
示例6: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
throws IOException {
super.initialize(inputSplit, context);
FileSplit split = (FileSplit) inputSplit;
final Path file = split.getPath();
String chrName = context.getConfiguration().get(CHROMOSOME);
String indexPath = context.getConfiguration().get("cram.index.path");
if (chrName != null) {
ChromosomeIndex chromosome = null;
if (indexPath == null)
chromosome = new ChromosomeIndex(file.toString());
else
chromosome = new ChromosomeIndex(file.toString(), indexPath
+ "/" + file.getName() + ".crai");
chromosome.setHeader(samFileHeader);
start = chromosome.getStart(chrName);
length = chromosome.getEnd(chrName) - start;
sequenceId = samFileHeader.getSequenceIndex(chrName);
seekableStream.seek(start);
}
}
示例7: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
final FileSplit split = (FileSplit) genericSplit;
final Configuration configuration = context.getConfiguration();
if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
KryoShimServiceLoader.applyConfiguration(ConfUtil.makeApacheConfiguration(configuration));
this.gryoReader = HadoopPools.getGryoPool().takeReader();
long start = split.getStart();
final Path file = split.getPath();
if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
}
// open the file and seek to the start of the split
this.inputStream = file.getFileSystem(configuration).open(split.getPath());
this.splitLength = split.getLength();
if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}
示例8: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
/*
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
String parsedText = pdfStripper.getText(pdDoc);
//System.out.println(parsedText);
* */
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
final Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
EpubParser epubParser = new EpubParser();
String parsedText = epubParser.epubParse(fileIn);
this.lines = parsedText.split("\n");
}
示例9: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize( final InputSplit inputSplit, final TaskAttemptContext context ) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit)inputSplit;
Configuration config = context.getConfiguration();
Path path = fileSplit.getPath();
FileSystem fs = path.getFileSystem( config );
long fileLength = fs.getLength( path );
long start = fileSplit.getStart();
long length = fileSplit.getLength();
InputStream in = fs.open( path );
}
示例10: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
Configuration conf = context.getConfiguration();
SeekableInput in = new FsInput(split.getPath(), conf);
DatumReader<T> datumReader = new GenericDatumReader<T>();
this.reader = DataFileReader.openReader(in, datumReader);
reader.sync(split.getStart()); // sync to start
this.start = reader.tell();
this.end = split.getStart() + split.getLength();
}
示例11: setup
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
protected void setup(Context context) {
// Get the thread num from the file number.
FileSplit split = (FileSplit) context.getInputSplit();
Path filePath = split.getPath();
String name = filePath.getName();
this.threadId = Integer.valueOf(name);
LOG.info("Thread " + threadId + " : "
+ context.getInputSplit());
}
示例12: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit)inputSplit;
if(fileSplit != null && fileSplit.getPath() != null && fileSplit.getPath().toString().endsWith(TEMP_FILE_SUFFIX)){
LOG.info("Not processing Avro tmp file {}", fileSplit.getPath());
}else {
super.initialize(inputSplit, context);
}
}
示例13: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
public void initialize(InputSplit genericSplit, TaskAttemptContext context) {
try {
FileSplit split = (FileSplit)genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
this.start = split.getStart();
this.end = this.start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
this.fileIn = fs.open(file);
CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
if(null != codec) {
this.isCompressedInput = true;
this.decompressor = CodecPool.getDecompressor(codec);
if(codec instanceof SplittableCompressionCodec) {
SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
this.start = cIn.getAdjustedStart();
this.end = cIn.getAdjustedEnd();
this.filePosition = cIn;
} else {
this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
} else {
this.fileIn.seek(this.start);
this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
if(this.start != 0L) {
this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
}
this.pos = this.start;
}catch(Exception ex){
LOG.warn("Exception occurred during initialization {}", ex, ex);
}
}
示例14: map
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
* {@inheritDoc}
*/
protected void map(final Object key, final Text value, final Context context) throws IOException, InterruptedException {
if (value!= null && value.toString() != null && value.toString().isEmpty()) {
return;
}
// Mapper sends data with parent directory path as keys to retain directory structure
final FileSplit fileSplit = (FileSplit) context.getInputSplit();
final Path filePath = fileSplit.getPath();
final String parentFilePath = String.format("%s/", filePath.getParent().toString());
log.debug("Parent file path {}", parentFilePath);
if (!fileSizesMap.containsKey(filePath.toString())) {
if (fileSystem == null){
final URI uri = URI.create(filePath.toString());
fileSystem = FileSystem.get(uri, configuration);
}
final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
for (FileStatus fileStatus : listStatuses) {
if (!fileStatus.isDirectory()) {
fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
}
}
}
final Text parentFilePathKey = new Text(parentFilePath);
final Text filePathKey = new Text(filePath.toString());
final Long fileSize = fileSizesMap.get(filePath.toString());
if (fileSize < threshold) {
context.write(parentFilePathKey, value);
} else {
context.write(filePathKey, value);
}
}
示例15: map
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //導入方法依賴的package包/類
/**
* {@inheritDoc}
*/
protected void map(final Object key, final BytesWritable value, final Context context) throws IOException, InterruptedException {
if (value!= null && value.toString() != null && value.toString().isEmpty()) {
return;
}
// Mapper sends data with parent directory path as keys to retain directory structure
final FileSplit fileSplit = (FileSplit) context.getInputSplit();
final Path filePath = fileSplit.getPath();
final String parentFilePath = String.format("%s/", filePath.getParent().toString());
log.debug("Parent file path {}", parentFilePath);
if (!fileSizesMap.containsKey(filePath.toString())) {
if (fileSystem == null){
final URI uri = URI.create(filePath.toString());
fileSystem = FileSystem.get(uri, configuration);
}
final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
for (FileStatus fileStatus : listStatuses) {
if (!fileStatus.isDirectory()) {
fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
}
}
}
final Text parentFilePathKey = new Text(parentFilePath);
final Text filePathKey = new Text(filePath.toString());
final Long fileSize = fileSizesMap.get(filePath.toString());
if (fileSize < threshold) {
context.write(parentFilePathKey, value);
} else {
context.write(filePathKey, value);
}
}