本文整理汇总了Java中org.apache.hadoop.mapred.TextInputFormat.getRecordReader方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.getRecordReader方法的具体用法?Java TextInputFormat.getRecordReader怎么用?Java TextInputFormat.getRecordReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.getRecordReader方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: computeCSVSize
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
protected Pair<Integer,Integer> computeCSVSize( Path path, JobConf job, FileSystem fs)
throws IOException
{
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
splits = IOUtilFunctions.sortInputSplits(splits);
//compute number of columns
int ncol = IOUtilFunctions.countNumColumnsCSV(splits, informat, job, _props.getDelim());
//compute number of rows
int nrow = 0;
for( int i=0; i<splits.length; i++ )
{
RecordReader<LongWritable, Text> reader = informat.getRecordReader(splits[i], job, Reporter.NULL);
LongWritable key = new LongWritable();
Text value = new Text();
try
{
//ignore header of first split
if( i==0 && _props.hasHeader() )
reader.next(key, value);
//count remaining number of rows, ignore meta data
while ( reader.next(key, value) ) {
String val = value.toString();
nrow += ( val.startsWith(TfUtils.TXMTD_MVPREFIX)
|| val.startsWith(TfUtils.TXMTD_NDPREFIX)) ? 0 : 1;
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
return new Pair<>(nrow, ncol);
}
示例2: mergeTextCellWithoutComp
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
private static void mergeTextCellWithoutComp( String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO )
throws DMLRuntimeException
{
try
{
//delete target file if already exists
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
if( ALLOW_COPY_CELLFILES )
{
copyAllFiles(fnameNew, inMO);
return; //we're done
}
//actual merge
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path( fnameNew );
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)));
String valueStr = null;
try
{
for( MatrixObject in : inMO ) //read/write all inputs
{
if( LOG.isTraceEnabled() )
LOG.trace("ResultMerge (local, file): Merge input "+in.hashCode()+" (fname="
+in.getFileName()+") via stream merge");
JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
Path tmpPath = new Path(in.getFileName());
FileInputFormat.addInputPath(tmpJob, tmpPath);
TextInputFormat informat = new TextInputFormat();
informat.configure(tmpJob);
InputSplit[] splits = informat.getSplits(tmpJob, 1);
LongWritable key = new LongWritable();
Text value = new Text();
for(InputSplit split: splits)
{
RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, tmpJob, Reporter.NULL);
try
{
while(reader.next(key, value))
{
valueStr = value.toString().trim();
out.write( valueStr+"\n" );
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
}
finally {
IOUtilFunctions.closeSilently(out);
}
}
catch(Exception ex)
{
throw new DMLRuntimeException("Unable to merge text cell results.", ex);
}
}
示例3: createTextCellStagingFile
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
private static void createTextCellStagingFile( String fnameStaging, MatrixObject mo, long ID )
throws IOException, DMLRuntimeException
{
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(mo.getFileName());
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList<Cell> buffer = new LinkedList<>();
LongWritable key = new LongWritable();
Text value = new Text();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
//long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively
//NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation
// errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit.
// It works fine with int row, col but we require long for larger matrices.
// Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell)
// we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0)
FastStringTokenizer st = new FastStringTokenizer(' ');
for(InputSplit split : splits)
{
RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try
{
while(reader.next(key, value))
{
st.reset( value.toString() ); //reset tokenizer
long row = st.nextLong();
long col = st.nextLong();
double lvalue = Double.parseDouble( st.nextToken() );
Cell tmp = new Cell( row, col, lvalue );
buffer.addLast( tmp );
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE ) //periodic flush
{
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
//final flush
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
示例4: createTextCellStagingFile
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
public void createTextCellStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList<Cell> buffer = new LinkedList<>();
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
for(InputSplit split: splits)
{
RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try
{
while( reader.next(key, value) )
{
st.reset( value.toString() ); //reset tokenizer
long row = st.nextLong();
long col = st.nextLong();
double lvalue = st.nextDouble();
buffer.add(new Cell(row,col,lvalue));
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(stagingDir, buffer, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
buffer.clear();
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
示例5: computeCSVSizeAndCreateOutputMatrixBlock
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
private MatrixBlock computeCSVSizeAndCreateOutputMatrixBlock(
InputSplit[] splits, Path path, JobConf job, boolean hasHeader,
String delim, long estnnz) throws IOException, DMLRuntimeException
{
int nrow = 0;
int ncol = 0;
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
// count no of entities in the first non-header row
LongWritable key = new LongWritable();
Text oneLine = new Text();
RecordReader<LongWritable, Text> reader = informat
.getRecordReader(splits[0], job, Reporter.NULL);
try {
if (reader.next(key, oneLine)) {
String cellStr = oneLine.toString().trim();
ncol = StringUtils.countMatches(cellStr, delim) + 1;
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
// count rows in parallel per split
try
{
ExecutorService pool = Executors.newFixedThreadPool(_numThreads);
ArrayList<CountRowsTask> tasks = new ArrayList<>();
for (InputSplit split : splits) {
tasks.add(new CountRowsTask(split, informat, job, hasHeader));
hasHeader = false;
}
pool.invokeAll(tasks);
pool.shutdown();
// collect row counts for offset computation
// early error notify in case not all tasks successful
_offsets = new SplitOffsetInfos(tasks.size());
for (CountRowsTask rt : tasks) {
if (!rt.getReturnCode())
throw new IOException("Count task for csv input failed: "+ rt.getErrMsg());
_offsets.setOffsetPerSplit(tasks.indexOf(rt), nrow);
_offsets.setLenghtPerSplit(tasks.indexOf(rt), rt.getRowCount());
nrow = nrow + rt.getRowCount();
}
}
catch (Exception e) {
throw new IOException("Threadpool Error " + e.getMessage(), e);
}
// allocate target matrix block based on given size;
// need to allocate sparse as well since lock-free insert into target
long estnnz2 = (estnnz < 0) ? (long)nrow * ncol : estnnz;
return createOutputMatrixBlock(nrow, ncol, nrow, ncol, estnnz2, true, true);
}
示例6: readTextCellFrameFromInputSplit
import org.apache.hadoop.mapred.TextInputFormat; //导入方法依赖的package包/类
protected static void readTextCellFrameFromInputSplit( InputSplit split, TextInputFormat informat, JobConf job, FrameBlock dest)
throws IOException
{
ValueType[] schema = dest.getSchema();
int rlen = dest.getNumRows();
int clen = dest.getNumColumns();
//create record reader
RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
int row = -1;
int col = -1;
try
{
while( reader.next(key, value) ) {
st.reset( value.toString() ); //reinit tokenizer
row = st.nextInt()-1;
col = st.nextInt()-1;
if( row == -3 )
dest.getColumnMetadata(col).setMvValue(st.nextToken());
else if( row == -2 )
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
else
dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
}
}
catch(Exception ex)
{
//post-mortem error handling and bounds checking
if( row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen ) {
throw new IOException("Frame cell ["+(row+1)+","+(col+1)+"] " +
"out of overall frame range [1:"+rlen+",1:"+clen+"].");
}
else {
throw new IOException( "Unable to read frame in text cell format.", ex );
}
}
finally {
IOUtilFunctions.closeSilently(reader);
}
}