當前位置: 首頁>>代碼示例>>Java>>正文


Java FileSplit.getPath方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapred.FileSplit.getPath方法的典型用法代碼示例。如果您正苦於以下問題:Java FileSplit.getPath方法的具體用法?Java FileSplit.getPath怎麽用?Java FileSplit.getPath使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.mapred.FileSplit的用法示例。


在下文中一共展示了FileSplit.getPath方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: getRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
@Override
public RecordReader<NullWritable,ColumnAndIndex> getRecordReader( final InputSplit split, final JobConf job, final Reporter reporter ) throws IOException {
  FileSplit fileSplit = (FileSplit)split;
  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem( job );
  long fileLength = fs.getLength( path );
  long start = fileSplit.getStart();
  long length = fileSplit.getLength();
  InputStream in = fs.open( path );
  IJobReporter jobReporter = new HadoopJobReporter( reporter );
  jobReporter.setStatus( String.format( "Read file : %s" , path.toString() ) );
  HiveReaderSetting hiveConfig = new HiveReaderSetting( fileSplit , job );
  if ( hiveConfig.isVectorMode() ){
    IVectorizedReaderSetting vectorizedSetting = new HiveVectorizedReaderSetting( fileSplit , job , hiveConfig );
    return (RecordReader)new MDSHiveDirectVectorizedReader( in , fileLength , start , length , vectorizedSetting , jobReporter );
  }
  else{
    return new MDSHiveLineReader( in , fileLength , start , length , hiveConfig , jobReporter , spreadCounter );
  }
}
 
開發者ID:yahoojapan,項目名稱:multiple-dimension-spread,代碼行數:21,代碼來源:MDSHiveLineInputFormat.java

示例2: DelimitedAndFixedWidthRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public DelimitedAndFixedWidthRecordReader(JobConf conf, FileSplit split)
		throws IOException {
	lengthsAndDelimiters = DelimitedAndFixedWidthHelper
			.modifyIdentifier(conf.get("lengthsAndDelimiters").split(Constants.LENGTHS_AND_DELIMITERS_SEPARATOR));
	lengthsAndDelimitersType = conf.get("lengthsAndDelimitersType").split(Constants.LENGTHS_AND_DELIMITERS_SEPARATOR);
	quote = conf.get("quote");
	charsetName = conf.get("charsetName");
	start = split.getStart();
	pos = start;
	end = start + split.getLength();
	file = split.getPath();
	fs = file.getFileSystem(conf);
	fileIn = fs.open(split.getPath());
	fileIn.seek(start);
	inputStreamReader = new InputStreamReader(fileIn, charsetName);
	singleChar = new char[1];
	stringBuilder = new StringBuilder();
	isQuotePresent = isQuotePresent(quote);
}
 
開發者ID:capitalone,項目名稱:Hydrograph,代碼行數:20,代碼來源:DelimitedAndFixedWidthRecordReader.java

示例3: IndexRRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public IndexRRecordReader(InputSplit inputSplit, Configuration configuration) throws IOException {
    FileSplit fileSplit = (FileSplit) inputSplit;
    Preconditions.checkState(fileSplit.getStart() == 0, "Segment should not splited");

    Path filePath = fileSplit.getPath();
    // Hive may ask to read a file located on local file system.
    // We have to get the real file system by path's schema.
    FileSystem fileSystem = FileSystem.get(filePath.toUri(), FileSystem.get(configuration).getConf());

    if (SegmentHelper.checkSegmentByPath(filePath)) {
        ByteBufferReader.Opener opener = ByteBufferReader.Opener.create(fileSystem, filePath);
        IntegratedSegment.Fd fd = IntegratedSegment.Fd.create(filePath.toString(), opener);
        if (fd != null) {
            segment = fd.open();
            offset = 0L;
            rowIterator = segment.rowTraversal().iterator();
            getIncludeColumns(configuration, segment);
        }
    } else {
        LOG.warn("ignore " + filePath);
    }
}
 
開發者ID:shunfei,項目名稱:indexr,代碼行數:23,代碼來源:IndexRRecordReader.java

示例4: LineDocRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
 * Constructor
 * @param job
 * @param split  
 * @throws IOException
 */
public LineDocRecordReader(Configuration job, FileSplit split)
    throws IOException {
  long start = split.getStart();
  long end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(split.getPath());
  InputStream in = fileIn;
  boolean skipFirstLine = false;
  if (start != 0) {
    skipFirstLine = true; // wait till BufferedInputStream to skip
    --start;
    fileIn.seek(start);
  }

  this.in = new BufferedInputStream(in);
  if (skipFirstLine) { // skip first line and re-establish "start".
    start += LineDocRecordReader.readData(this.in, null, EOL);
  }
  this.start = start;
  this.pos = start;
  this.end = end;
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:32,代碼來源:LineDocRecordReader.java

示例5: XMLRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public XMLRecordReader(FileSplit split, JobConf jobConf) throws IOException {
  log.info("Setting up XMLRecordReader for path: [" + split.getPath() + "]");
  log.info("startTag=" + jobConf.get(START_TAG_KEY) + ", endTag=" + jobConf.get(END_TAG_KEY));

  startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
  endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");

  // open the file and seek to the start of the split
  start = split.getStart();
  end = start + split.getLength();

  Path file = split.getPath();
  FileSystem fs = file.getFileSystem(jobConf);

  path = split.getPath().getName();

  fsin = fs.open(split.getPath());
  fsin.seek(start);
}
 
開發者ID:lucidworks,項目名稱:solr-hadoop-common,代碼行數:20,代碼來源:XMLInputFormat.java

示例6: SplitVideoFileRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
SplitVideoFileRecordReader(InputSplit Insplit,JobConf job) throws IOException
{
	//初始化,文件定位,內存分配
	FileSplit	split = (FileSplit)Insplit;
	start = split.getStart();
	end = split.getLength()+start;
	
	final Path path = split.getPath();
	System.out.print("split path:"+path.toString());
	final FileSystem fs = path.getFileSystem(job);
	fileIn = fs.open(path);
	fileIn.seek(start);
	System.out.println("len="+split.getLength()+"start="+start+"end="+end);
	buffer = new byte[PerReadLen];
	this.pos = start;

}
 
開發者ID:SH-Allegion,項目名稱:StuJava,代碼行數:18,代碼來源:SplitVideoFileRecordReader.java

示例7: RangePickRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public RangePickRecordReader(JobConf job, FileSplit split) 
	throws IOException 
{
	parseSelectedRangeString(job.get(SELECTED_RANGES));
	
	// check if the current part file needs to be processed
   	path = split.getPath();
   	totLength = split.getLength();
   	currentStream = IOUtilFunctions.getFileSystem(path, job).open(path);
   	currPart = getIndexInTheArray(path.getName());
   	
   	if ( currPart < beginPart || currPart > endPart ) {
   		noRecordsNeeded = true;
   		return;
   	}
   	
	int part0=job.getInt(PARTITION_OF_ZERO, -1);
	boolean contain0s=false;
	long numZeros =0;
   	if(part0==currPart) {
   		contain0s = true;
   		numZeros = job.getLong(NUMBER_OF_ZERO, 0);
   	}
   	reader=new ReadWithZeros(currentStream, contain0s, numZeros);
}
 
開發者ID:apache,項目名稱:systemml,代碼行數:26,代碼來源:PickFromCompactInputFormat.java

示例8: ParsedRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public ParsedRecordReader ( FileSplit split,
    Configuration conf,
    Class<? extends Parser> parser_class,
    Trees args ) throws IOException {
    start = split.getStart();
    end = start + split.getLength();
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    fsin = fs.open(split.getPath());
    try {
        parser = parser_class.newInstance();
    } catch (Exception ex) {
        throw new Error("Unrecognized parser:"+parser_class);
    };
    parser.initialize(args);
    parser.open(fsin,start,end);
    result = null;
}
 
開發者ID:apache,項目名稱:incubator-mrql,代碼行數:19,代碼來源:StormParsedInputFormat.java

示例9: HiveParquetSplit

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
HiveParquetSplit(DatasetSplit datasetSplit) {
  this.datasetSplit = datasetSplit;
  try {
    final HiveSplitXattr splitAttr = HiveSplitXattr.parseFrom(datasetSplit.getExtendedProperty().toByteArray());
    final FileSplit fullFileSplit = (FileSplit) HiveAbstractReader.deserializeInputSplit(splitAttr.getInputSplit());
    // make a copy of file split, we only need file path, start and length, throw away hosts
    this.fileSplit = new FileSplit(fullFileSplit.getPath(), fullFileSplit.getStart(), fullFileSplit.getLength(), (String[])null);
    this.partitionId = splitAttr.getPartitionId();
  } catch (IOException | ReflectiveOperationException e) {
    throw new RuntimeException("Failed to parse dataset split for " + datasetSplit.getSplitKey(), e);
  }
}
 
開發者ID:dremio,項目名稱:dremio-oss,代碼行數:13,代碼來源:HiveScanBatchCreator.java

示例10: ArcRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
/**
 * Constructor that sets the configuration and file split.
 * 
 * @param conf
 *          The job configuration.
 * @param split
 *          The file split to read from.
 * 
 * @throws IOException
 *           If an IO error occurs while initializing file split.
 */
public ArcRecordReader(Configuration conf, FileSplit split)
    throws IOException {

  Path path = split.getPath();
  FileSystem fs = path.getFileSystem(conf);
  fileLen = fs.getFileStatus(split.getPath()).getLen();
  this.conf = conf;
  this.in = fs.open(split.getPath());
  this.splitStart = split.getStart();
  this.splitEnd = splitStart + split.getLength();
  this.splitLen = split.getLength();
  in.seek(splitStart);
}
 
開發者ID:jorcox,項目名稱:GeoCrawler,代碼行數:25,代碼來源:ArcRecordReader.java

示例11: getRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
@Override
public RecordReader<Text, Row> getRecordReader(InputSplit split, JobConf config, Reporter reporter)
        throws IOException {
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    return new EmoRecordReader(BaseInputFormat.createRecordReader(config, path));
}
 
開發者ID:bazaarvoice,項目名稱:emodb,代碼行數:8,代碼來源:EmoInputFormat.java

示例12: ZipFileRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public ZipFileRecordReader(FileSplit fileSplit, Configuration conf) throws IOException {
  this.fileSplit = fileSplit;

  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem(conf);

  //Open the stream
  if (CompressionHelper.isCompressed(path)) {
    compressedInputStream = CompressionHelper.openCompressedFile(path, conf);
    zip = new ZipInputStream(compressedInputStream);
  } else {
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);
  }
}
 
開發者ID:lucidworks,項目名稱:solr-hadoop-common,代碼行數:16,代碼來源:ZipFileRecordReader.java

示例13: RemoteParForColocatedFileSplit

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public RemoteParForColocatedFileSplit( FileSplit split, String fname, int blen ) 
	throws IOException 
{
	super( split.getPath(), split.getStart(), split.getLength(), split.getLocations() );
	
	_fname = fname;
	_blen = blen;
}
 
開發者ID:apache,項目名稱:systemml,代碼行數:9,代碼來源:RemoteParForColocatedFileSplit.java

示例14: CompactInputRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
@SuppressWarnings("unchecked")
public CompactInputRecordReader(JobConf job, FileSplit split) throws IOException {

   	path = split.getPath();
   	fs = IOUtilFunctions.getFileSystem(path, job);
	totLength = split.getLength();
   	currentStream = fs.open(path);
   	keyClass=(Class<? extends WritableComparable>) job.getClass(KEY_CLASS, WritableComparable.class);
   	valueClass=(Class<? extends Writable>) job.getClass(VALUE_CLASS, Writable.class);
   }
 
開發者ID:apache,項目名稱:systemml,代碼行數:11,代碼來源:CompactInputFormat.java

示例15: PickRecordReader

import org.apache.hadoop.mapred.FileSplit; //導入方法依賴的package包/類
public PickRecordReader(JobConf job, FileSplit split)
	throws IOException
{
	path = split.getPath();
	fs = IOUtilFunctions.getFileSystem(path, job);
	currentStream = fs.open(path);
   	int partIndex=getIndexInTheArray(path.getName());
   	String arrStr=job.get(SELECTED_POINTS_PREFIX+partIndex);
   	if(arrStr==null || arrStr.isEmpty()) {
   		noRecordsNeeded=true;
   		return;
   	}
   	
   	String[] strs=arrStr.split(",");
   	pos=new int[strs.length];
   	indexes=new int[strs.length];
   	for(int i=0; i<strs.length; i++) {
   		String[] temp=strs[i].split(":");
   		pos[i]=Integer.parseInt(temp[0]);
   		indexes[i]=Integer.parseInt(temp[1]);
   	}
   	
	valueIsWeight=job.getBoolean(VALUE_IS_WEIGHT, true);
	
	int part0=job.getInt(PARTITION_OF_ZERO, -1);
	boolean contain0s=false;
	long numZeros =0;
   	if(part0==partIndex) {
   		contain0s = true;
   		numZeros = job.getLong(NUMBER_OF_ZERO, 0);
   	}
   	reader=new ReadWithZeros(currentStream, contain0s, numZeros);
}
 
開發者ID:apache,項目名稱:systemml,代碼行數:34,代碼來源:PickFromCompactInputFormat.java


注:本文中的org.apache.hadoop.mapred.FileSplit.getPath方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。