本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.NLineInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java NLineInputFormat类的具体用法?Java NLineInputFormat怎么用?Java NLineInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
NLineInputFormat类属于org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了NLineInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: interleaveSplitFastq
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例2: splitFastq
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
Path fqpath = new Path(fqPath);
String fqname = fqpath.getName();
String[] ns = fqname.split("\\.");
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);
});
}
示例3: interleaveSplitFastq
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例4: interleaveSplitFastq
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
String[] ns = fst.getPath().getName().split("\\.");
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例5: initializeMemberVariables
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@SuppressWarnings("deprecation")
@Override
public void initializeMemberVariables() {
xmlFilename = new String("mapred-default.xml");
configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };
// Initialize used variables
configurationPropsToSkipCompare = new HashSet<String>();
// Set error modes
errorIfMissingConfigProps = true;
errorIfMissingXmlProps = false;
// Ignore deprecated MR1 properties in JobConf
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
}
示例6: setupJob
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void setupJob(Job job, int minFeaturesPerSplit, long featureCount)
{
if (minFeaturesPerSplit > 0)
{
if (featureCount < 0)
{
throw new IllegalArgumentException("Expected a feature count");
}
int maxMapTasks = job.getConfiguration().getInt("mapred.tasktracker.map.tasks.maximum", -1);
if (maxMapTasks > 0)
{
int featuresPerSplit = (int) (featureCount / maxMapTasks);
if (featuresPerSplit < minFeaturesPerSplit)
{
featuresPerSplit = minFeaturesPerSplit;
}
job.getConfiguration().setBoolean(USE_NLINE_FORMAT, true);
NLineInputFormat.setNumLinesPerSplit(job, featuresPerSplit);
}
}
}
示例7: getNumLinesPerSplit
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
* Java wrapper for {@link NLineInputFormat#getNumLinesPerSplit(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return the number of lines per split
*/
@JSStaticFunction
public static Object getNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
if (args.length < 1) {
throw Utils.makeError(ctx, thisObj, LembosMessages.ONE_ARG_EXPECTED);
} else if (!JavaScriptUtils.isDefined(arg0)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
} else if (!(arg0 instanceof JobWrap)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
}
return NLineInputFormat.getNumLinesPerSplit(((JobWrap)arg0).getJob());
}
示例8: setNumLinesPerSplit
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
* Java wrapper for {@link NLineInputFormat#setNumLinesPerSplit(org.apache.hadoop.mapreduce.Job, int)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function called (unused)
*/
@JSStaticFunction
public static void setNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
final Object arg1 = args.length >= 2 ? args[1] : Undefined.instance;
if (args.length < 2) {
throw Utils.makeError(ctx, thisObj, LembosMessages.TWO_ARGS_EXPECTED);
} else if (!JavaScriptUtils.isDefined(arg0)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
} else if (!JavaScriptUtils.isDefined(arg1)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_REQUIRED);
} else if (!(arg0 instanceof JobWrap)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
} else if (!(arg1 instanceof Number)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_ARG_MUST_BE_NUM);
}
NLineInputFormat.setNumLinesPerSplit(((JobWrap)arg0).getJob(), JavaScriptUtils.fromNumber(arg1).intValue());
}
示例9: splitFastq
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
Path fqpath = new Path(fqPath);
String fqname = fqpath.getName();
String[] ns = fqname.split("\\.");
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);
});
}
示例10: run
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step4LocalDeDuplication.class);
job.setJobName(Phase3Step4LocalDeDuplication.class.getName());
// paths
String inputPath = args[0];
// text files of ids to be deleted
String outputPath = args[1];
// input: reading max N lines for each mapper
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job, new Path(inputPath));
job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);
// mapper
job.setMapperClass(LocalGreedyDeDuplicationMapper.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
// reducer
job.setReducerClass(IDCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
示例11: run
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
* Generates a Person hadoop sequence file containing key-value paiers
* where the key is the person id and the value is the person itself.
*
* @param outputFileName The name of the file to store the persons.
* @throws Exception
*/
public void run(String outputFileName, String postKeySetterName) throws Exception {
String hadoopDir = new String(conf.get("ldbc.snb.datagen.serializer.hadoopDir"));
String tempFile = hadoopDir + "/mrInputFile";
FileSystem dfs = FileSystem.get(conf);
dfs.delete(new Path(tempFile), true);
writeToOutputFile(tempFile, Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads")), conf);
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
conf.setInt("mapreduce.input.lineinputformat.linespermap", 1);
conf.set("postKeySetterName", postKeySetterName);
Job job = Job.getInstance(conf, "SIB Generate Users & 1st Dimension");
job.setMapOutputKeyClass(TupleKey.class);
job.setMapOutputValueClass(Person.class);
job.setOutputKeyClass(TupleKey.class);
job.setOutputValueClass(Person.class);
job.setJarByClass(HadoopPersonGeneratorMapper.class);
job.setMapperClass(HadoopPersonGeneratorMapper.class);
job.setReducerClass(HadoopPersonGeneratorReducer.class);
job.setNumReduceTasks(numThreads);
job.setInputFormatClass(NLineInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(tempFile));
FileOutputFormat.setOutputPath(job, new Path(outputFileName));
if (!job.waitForCompletion(true)) {
throw new Exception();
}
}
示例12: doMapReduce
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
DescriptiveStatistics.class, // commons-math
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
示例13: run
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
* Run method called for starting a MapReduce Job
*/
public int run(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
checkRequiredPaths();
long startTime = 0;
if(measureTime)
startTime = System.nanoTime();
Configuration conf = getConf();
Job job = Job.getInstance(conf, "ImageSearcher");
job.setJarByClass(ImageSearcher.class);
job.setMapperClass(ImageSearchMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(ImageDistanceMap.class);
job.setReducerClass(ImageSearchReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(conf.get("ImageFeatures")));
FileOutputFormat.setOutputPath(job, new Path(conf.get("Output")));
boolean res = job.waitForCompletion(true);
if(measureTime) {
long elapsedTime = System.nanoTime() - startTime;
System.out.println("== MapReduce Execution Time: " + (double)elapsedTime / 1000000000.0 + "s ==");
}
return res ? 0 : 1;
}
示例14: initializeMemberVariables
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@Override
public void initializeMemberVariables() {
xmlFilename = new String("mapred-default.xml");
configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };
// Initialize used variables
configurationPropsToSkipCompare = new HashSet<String>();
xmlPropsToSkipCompare = new HashSet<String>();
// Set error modes
errorIfMissingConfigProps = true;
errorIfMissingXmlProps = false;
// Ignore deprecated MR1 properties in JobConf
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
// Obsolete entries listed in MAPREDUCE-6057 were removed from trunk
// but not removed from branch-2.
xmlPropsToSkipCompare.add("map.sort.class");
xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name");
xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir");
xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir");
}
示例15: doMapReduce
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
Histogram.class, // yammer metrics
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}