本文整理匯總了Java中org.apache.hadoop.mapred.FileSplit.getLength方法的典型用法代碼示例。如果您正苦於以下問題:Java FileSplit.getLength方法的具體用法?Java FileSplit.getLength怎麽用?Java FileSplit.getLength使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapred.FileSplit
的用法示例。
在下文中一共展示了FileSplit.getLength方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getRowGroupNumbersFromFileSplit
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* Get the list of row group numbers for given file input split. Logic used here is same as how Hive's parquet input
* format finds the row group numbers for input split.
*/
private List<Integer> getRowGroupNumbersFromFileSplit(final FileSplit split,
final ParquetMetadata footer) throws IOException {
final List<BlockMetaData> blocks = footer.getBlocks();
final long splitStart = split.getStart();
final long splitLength = split.getLength();
final List<Integer> rowGroupNums = Lists.newArrayList();
int i = 0;
for (final BlockMetaData block : blocks) {
final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
rowGroupNums.add(i);
}
i++;
}
return rowGroupNums;
}
示例2: getRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
@Override
public RecordReader<NullWritable,ColumnAndIndex> getRecordReader( final InputSplit split, final JobConf job, final Reporter reporter ) throws IOException {
FileSplit fileSplit = (FileSplit)split;
Path path = fileSplit.getPath();
FileSystem fs = path.getFileSystem( job );
long fileLength = fs.getLength( path );
long start = fileSplit.getStart();
long length = fileSplit.getLength();
InputStream in = fs.open( path );
IJobReporter jobReporter = new HadoopJobReporter( reporter );
jobReporter.setStatus( String.format( "Read file : %s" , path.toString() ) );
HiveReaderSetting hiveConfig = new HiveReaderSetting( fileSplit , job );
if ( hiveConfig.isVectorMode() ){
IVectorizedReaderSetting vectorizedSetting = new HiveVectorizedReaderSetting( fileSplit , job , hiveConfig );
return (RecordReader)new MDSHiveDirectVectorizedReader( in , fileLength , start , length , vectorizedSetting , jobReporter );
}
else{
return new MDSHiveLineReader( in , fileLength , start , length , hiveConfig , jobReporter , spreadCounter );
}
}
示例3: DelimitedAndFixedWidthRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public DelimitedAndFixedWidthRecordReader(JobConf conf, FileSplit split)
throws IOException {
lengthsAndDelimiters = DelimitedAndFixedWidthHelper
.modifyIdentifier(conf.get("lengthsAndDelimiters").split(Constants.LENGTHS_AND_DELIMITERS_SEPARATOR));
lengthsAndDelimitersType = conf.get("lengthsAndDelimitersType").split(Constants.LENGTHS_AND_DELIMITERS_SEPARATOR);
quote = conf.get("quote");
charsetName = conf.get("charsetName");
start = split.getStart();
pos = start;
end = start + split.getLength();
file = split.getPath();
fs = file.getFileSystem(conf);
fileIn = fs.open(split.getPath());
fileIn.seek(start);
inputStreamReader = new InputStreamReader(fileIn, charsetName);
singleChar = new char[1];
stringBuilder = new StringBuilder();
isQuotePresent = isQuotePresent(quote);
}
示例4: getRowGroupNumbersFromFileSplit
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* Get the list of row group numbers for given file input split. Logic used here is same as how Hive's parquet input
* format finds the row group numbers for input split.
*/
private static List<Integer> getRowGroupNumbersFromFileSplit(final FileSplit split,
final ParquetMetadata footer) throws IOException {
final List<BlockMetaData> blocks = footer.getBlocks();
final long splitStart = split.getStart();
final long splitLength = split.getLength();
final List<Integer> rowGroupNums = Lists.newArrayList();
int i = 0;
for (final BlockMetaData block : blocks) {
final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
rowGroupNums.add(i);
}
i++;
}
return rowGroupNums;
}
示例5: LineDocRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* Constructor
* @param job
* @param split
* @throws IOException
*/
public LineDocRecordReader(Configuration job, FileSplit split)
throws IOException {
long start = split.getStart();
long end = start + split.getLength();
final Path file = split.getPath();
// open the file and seek to the start of the split
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
InputStream in = fileIn;
boolean skipFirstLine = false;
if (start != 0) {
skipFirstLine = true; // wait till BufferedInputStream to skip
--start;
fileIn.seek(start);
}
this.in = new BufferedInputStream(in);
if (skipFirstLine) { // skip first line and re-establish "start".
start += LineDocRecordReader.readData(this.in, null, EOL);
}
this.start = start;
this.pos = start;
this.end = end;
}
示例6: XMLRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public XMLRecordReader(FileSplit split, JobConf jobConf) throws IOException {
log.info("Setting up XMLRecordReader for path: [" + split.getPath() + "]");
log.info("startTag=" + jobConf.get(START_TAG_KEY) + ", endTag=" + jobConf.get(END_TAG_KEY));
startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");
// open the file and seek to the start of the split
start = split.getStart();
end = start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(jobConf);
path = split.getPath().getName();
fsin = fs.open(split.getPath());
fsin.seek(start);
}
示例7: SplitVideoFileRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
SplitVideoFileRecordReader(InputSplit Insplit,JobConf job) throws IOException
{
//初始化,文件定位,內存分配
FileSplit split = (FileSplit)Insplit;
start = split.getStart();
end = split.getLength()+start;
final Path path = split.getPath();
System.out.print("split path:"+path.toString());
final FileSystem fs = path.getFileSystem(job);
fileIn = fs.open(path);
fileIn.seek(start);
System.out.println("len="+split.getLength()+"start="+start+"end="+end);
buffer = new byte[PerReadLen];
this.pos = start;
}
示例8: RangePickRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public RangePickRecordReader(JobConf job, FileSplit split)
throws IOException
{
parseSelectedRangeString(job.get(SELECTED_RANGES));
// check if the current part file needs to be processed
path = split.getPath();
totLength = split.getLength();
currentStream = IOUtilFunctions.getFileSystem(path, job).open(path);
currPart = getIndexInTheArray(path.getName());
if ( currPart < beginPart || currPart > endPart ) {
noRecordsNeeded = true;
return;
}
int part0=job.getInt(PARTITION_OF_ZERO, -1);
boolean contain0s=false;
long numZeros =0;
if(part0==currPart) {
contain0s = true;
numZeros = job.getLong(NUMBER_OF_ZERO, 0);
}
reader=new ReadWithZeros(currentStream, contain0s, numZeros);
}
示例9: readFileBlock
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* @param clsName Input split class name.
* @param in Input stream.
* @param hosts Optional hosts.
* @return File block or {@code null} if it is not a {@link FileSplit} instance.
* @throws IgniteCheckedException If failed.
*/
@Nullable public static HadoopFileBlock readFileBlock(String clsName, FSDataInputStream in,
@Nullable String[] hosts) throws IgniteCheckedException {
if (!FileSplit.class.getName().equals(clsName))
return null;
FileSplit split = U.newInstance(FileSplit.class);
try {
split.readFields(in);
}
catch (IOException e) {
throw new IgniteCheckedException(e);
}
if (hosts == null)
hosts = EMPTY_HOSTS;
return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength());
}
示例10: ParsedRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public ParsedRecordReader ( FileSplit split,
Configuration conf,
Class<? extends Parser> parser_class,
Trees args ) throws IOException {
start = split.getStart();
end = start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(conf);
fsin = fs.open(split.getPath());
try {
parser = parser_class.newInstance();
} catch (Exception ex) {
throw new Error("Unrecognized parser:"+parser_class);
};
parser.initialize(args);
parser.open(fsin,start,end);
result = null;
}
示例11: HiveParquetSplit
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
HiveParquetSplit(DatasetSplit datasetSplit) {
this.datasetSplit = datasetSplit;
try {
final HiveSplitXattr splitAttr = HiveSplitXattr.parseFrom(datasetSplit.getExtendedProperty().toByteArray());
final FileSplit fullFileSplit = (FileSplit) HiveAbstractReader.deserializeInputSplit(splitAttr.getInputSplit());
// make a copy of file split, we only need file path, start and length, throw away hosts
this.fileSplit = new FileSplit(fullFileSplit.getPath(), fullFileSplit.getStart(), fullFileSplit.getLength(), (String[])null);
this.partitionId = splitAttr.getPartitionId();
} catch (IOException | ReflectiveOperationException e) {
throw new RuntimeException("Failed to parse dataset split for " + datasetSplit.getSplitKey(), e);
}
}
示例12: ArcRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* Constructor that sets the configuration and file split.
*
* @param conf
* The job configuration.
* @param split
* The file split to read from.
*
* @throws IOException
* If an IO error occurs while initializing file split.
*/
public ArcRecordReader(Configuration conf, FileSplit split)
throws IOException {
Path path = split.getPath();
FileSystem fs = path.getFileSystem(conf);
fileLen = fs.getFileStatus(split.getPath()).getLen();
this.conf = conf;
this.in = fs.open(split.getPath());
this.splitStart = split.getStart();
this.splitEnd = splitStart + split.getLength();
this.splitLen = split.getLength();
in.seek(splitStart);
}
示例13: RemoteParForColocatedFileSplit
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public RemoteParForColocatedFileSplit( FileSplit split, String fname, int blen )
throws IOException
{
super( split.getPath(), split.getStart(), split.getLength(), split.getLocations() );
_fname = fname;
_blen = blen;
}
示例14: CompactInputRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
@SuppressWarnings("unchecked")
public CompactInputRecordReader(JobConf job, FileSplit split) throws IOException {
path = split.getPath();
fs = IOUtilFunctions.getFileSystem(path, job);
totLength = split.getLength();
currentStream = fs.open(path);
keyClass=(Class<? extends WritableComparable>) job.getClass(KEY_CLASS, WritableComparable.class);
valueClass=(Class<? extends Writable>) job.getClass(VALUE_CLASS, Writable.class);
}
示例15: ArcRecordReader
import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
* Constructor that sets the configuration and file split.
*
* @param conf The job configuration.
* @param split The file split to read from.
*
* @throws IOException If an IO error occurs while initializing file split.
*/
public ArcRecordReader(Configuration conf, FileSplit split)
throws IOException {
Path path = split.getPath();
FileSystem fs = path.getFileSystem(conf);
fileLen = fs.getFileStatus(split.getPath()).getLen();
this.conf = conf;
this.in = fs.open(split.getPath());
this.splitStart = split.getStart();
this.splitEnd = splitStart + split.getLength();
this.splitLen = split.getLength();
in.seek(splitStart);
}