当前位置: 首页>>代码示例>>Java>>正文


Java NLineInputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.NLineInputFormat的典型用法代码示例。如果您正苦于以下问题:Java NLineInputFormat类的具体用法?Java NLineInputFormat怎么用?Java NLineInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


NLineInputFormat类属于org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了NLineInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: interleaveSplitFastq

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:17,代码来源:InterleaveMulti.java

示例2: splitFastq

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
  Path fqpath = new Path(fqPath);
  String fqname = fqpath.getName();
  String[] ns = fqname.split("\\.");
  //TODO: Handle also compressed files
  List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

  JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

  splitRDD.foreach( split ->  {

    FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
    writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);

   });
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:17,代码来源:InterleaveMulti.java

示例3: interleaveSplitFastq

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);

      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:18,代码来源:Decompress.java

示例4: interleaveSplitFastq

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    String[] ns = fst.getPath().getName().split("\\.");
    //TODO: Handle also compressed files
    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:19,代码来源:DecompressInterleave.java

示例5: initializeMemberVariables

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@SuppressWarnings("deprecation")
 @Override
 public void initializeMemberVariables() {
   xmlFilename = new String("mapred-default.xml");
   configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
       JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };

   // Initialize used variables
   configurationPropsToSkipCompare = new HashSet<String>();

   // Set error modes
   errorIfMissingConfigProps = true;
   errorIfMissingXmlProps = false;

   // Ignore deprecated MR1 properties in JobConf
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
 }
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:23,代码来源:TestMapreduceConfigFields.java

示例6: setupJob

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
public static void setupJob(Job job, int minFeaturesPerSplit, long featureCount)
{
  if (minFeaturesPerSplit > 0)
  {
    if (featureCount < 0)
    {
      throw new IllegalArgumentException("Expected a feature count");
    }
    int maxMapTasks = job.getConfiguration().getInt("mapred.tasktracker.map.tasks.maximum", -1);
    if (maxMapTasks > 0)
    {
      int featuresPerSplit = (int) (featureCount / maxMapTasks);
      if (featuresPerSplit < minFeaturesPerSplit)
      {
        featuresPerSplit = minFeaturesPerSplit;
      }
      job.getConfiguration().setBoolean(USE_NLINE_FORMAT, true);
      NLineInputFormat.setNumLinesPerSplit(job, featuresPerSplit);
    }
  }
}
 
开发者ID:ngageoint,项目名称:mrgeo,代码行数:22,代码来源:DelimitedVectorInputFormat.java

示例7: getNumLinesPerSplit

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
 * Java wrapper for {@link NLineInputFormat#getNumLinesPerSplit(org.apache.hadoop.mapreduce.JobContext)}.
 *
 * @param ctx the JavaScript context
 * @param thisObj the 'this' object
 * @param args the function arguments
 * @param func the function being called
 *
 * @return the number of lines per split
 */
@JSStaticFunction
public static Object getNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
                                        final Function func) {
    final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;

    if (args.length < 1) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.ONE_ARG_EXPECTED);
    } else if (!JavaScriptUtils.isDefined(arg0)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
    } else if (!(arg0 instanceof JobWrap)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
    }

    return NLineInputFormat.getNumLinesPerSplit(((JobWrap)arg0).getJob());
}
 
开发者ID:apigee,项目名称:lembos,代码行数:26,代码来源:NLineInputFormatWrap.java

示例8: setNumLinesPerSplit

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
 * Java wrapper for {@link NLineInputFormat#setNumLinesPerSplit(org.apache.hadoop.mapreduce.Job, int)}.
 *
 * @param ctx the JavaScript context
 * @param thisObj the 'this' object
 * @param args the function arguments
 * @param func the function called (unused)
 */
@JSStaticFunction
public static void setNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
                                       final Function func) {
    final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
    final Object arg1 = args.length >= 2 ? args[1] : Undefined.instance;

    if (args.length < 2) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.TWO_ARGS_EXPECTED);
    } else if (!JavaScriptUtils.isDefined(arg0)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
    } else if (!JavaScriptUtils.isDefined(arg1)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_REQUIRED);
    } else if (!(arg0 instanceof JobWrap)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
    } else if (!(arg1 instanceof Number)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_ARG_MUST_BE_NUM);
    }

    NLineInputFormat.setNumLinesPerSplit(((JobWrap)arg0).getJob(), JavaScriptUtils.fromNumber(arg1).intValue());
}
 
开发者ID:apigee,项目名称:lembos,代码行数:29,代码来源:NLineInputFormatWrap.java

示例9: splitFastq

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
  Path fqpath = new Path(fqPath);
  String fqname = fqpath.getName();
  String[] ns = fqname.split("\\.");
  List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

  JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);

  splitRDD.foreach( split ->  {

    FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
    writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);

   });
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:16,代码来源:Decompress.java

示例10: run

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step4LocalDeDuplication.class);
    job.setJobName(Phase3Step4LocalDeDuplication.class.getName());

    // paths
    String inputPath = args[0];
    // text files of ids to be deleted
    String outputPath = args[1];

    // input: reading max N lines for each mapper
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, new Path(inputPath));
    job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);

    // mapper
    job.setMapperClass(LocalGreedyDeDuplicationMapper.class);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // reducer
    job.setReducerClass(IDCollectorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:35,代码来源:Phase3Step4LocalDeDuplication.java

示例11: run

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
 * Generates a Person hadoop sequence file containing key-value paiers
 * where the key is the person id and the value is the person itself.
 *
 * @param outputFileName The name of the file to store the persons.
 * @throws Exception
 */
public void run(String outputFileName, String postKeySetterName) throws Exception {

    String hadoopDir = new String(conf.get("ldbc.snb.datagen.serializer.hadoopDir"));
    String tempFile = hadoopDir + "/mrInputFile";

    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(tempFile), true);
    writeToOutputFile(tempFile, Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads")), conf);

    int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
    conf.setInt("mapreduce.input.lineinputformat.linespermap", 1);
    conf.set("postKeySetterName", postKeySetterName);
    Job job = Job.getInstance(conf, "SIB Generate Users & 1st Dimension");
    job.setMapOutputKeyClass(TupleKey.class);
    job.setMapOutputValueClass(Person.class);
    job.setOutputKeyClass(TupleKey.class);
    job.setOutputValueClass(Person.class);
    job.setJarByClass(HadoopPersonGeneratorMapper.class);
    job.setMapperClass(HadoopPersonGeneratorMapper.class);
    job.setReducerClass(HadoopPersonGeneratorReducer.class);
    job.setNumReduceTasks(numThreads);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(tempFile));
    FileOutputFormat.setOutputPath(job, new Path(outputFileName));
    if (!job.waitForCompletion(true)) {
        throw new Exception();
    }
}
 
开发者ID:ldbc,项目名称:ldbc_snb_datagen,代码行数:37,代码来源:HadoopPersonGenerator.java

示例12: doMapReduce

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf, opts);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = new Job(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(NLineInputFormat.class);
  NLineInputFormat.setInputPaths(job, inputDir);
  // this is default, but be explicit about it just in case.
  NLineInputFormat.setNumLinesPerSplit(job, 1);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);

  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));

  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
    DescriptiveStatistics.class, // commons-math
    ObjectMapper.class);         // jackson-mapper-asl

  TableMapReduceUtil.initCredentials(job);

  job.waitForCompletion(true);
}
 
开发者ID:tenggyut,项目名称:HIndex,代码行数:36,代码来源:PerformanceEvaluation.java

示例13: run

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
/**
 * Run method called for starting a MapReduce Job
 */
public int run(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException  {
	checkRequiredPaths();
	
	long startTime = 0;
	if(measureTime)
		startTime = System.nanoTime(); 

	Configuration conf = getConf();
    Job job = Job.getInstance(conf, "ImageSearcher");
    job.setJarByClass(ImageSearcher.class);
    
    job.setMapperClass(ImageSearchMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(ImageDistanceMap.class);

    job.setReducerClass(ImageSearchReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(NLineInputFormat.class);
    
    job.setNumReduceTasks(1);
    
    FileInputFormat.addInputPath(job, new Path(conf.get("ImageFeatures")));
    FileOutputFormat.setOutputPath(job, new Path(conf.get("Output")));  
    
    boolean res = job.waitForCompletion(true);
   
    if(measureTime) {
    	long elapsedTime = System.nanoTime() - startTime;
    	System.out.println("== MapReduce Execution Time: " + (double)elapsedTime / 1000000000.0 + "s ==");
    }
    
    return res ? 0 : 1;
}
 
开发者ID:hiiamok,项目名称:DISH,代码行数:39,代码来源:ImageSearcher.java

示例14: initializeMemberVariables

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
@Override
 public void initializeMemberVariables() {
   xmlFilename = new String("mapred-default.xml");
   configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
       JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };

   // Initialize used variables
   configurationPropsToSkipCompare = new HashSet<String>();
   xmlPropsToSkipCompare = new HashSet<String>();

   // Set error modes
   errorIfMissingConfigProps = true;
   errorIfMissingXmlProps = false;

   // Ignore deprecated MR1 properties in JobConf
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);

   // Obsolete entries listed in MAPREDUCE-6057 were removed from trunk
   // but not removed from branch-2.
   xmlPropsToSkipCompare.add("map.sort.class");
   xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name");
   xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir");
   xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir");
 }
 
开发者ID:hopshadoop,项目名称:hops,代码行数:30,代码来源:TestMapreduceConfigFields.java

示例15: doMapReduce

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf, opts);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = new Job(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(NLineInputFormat.class);
  NLineInputFormat.setInputPaths(job, inputDir);
  // this is default, but be explicit about it just in case.
  NLineInputFormat.setNumLinesPerSplit(job, 1);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);

  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));

  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
    Histogram.class,     // yammer metrics   
    ObjectMapper.class); // jackson-mapper-asl

  TableMapReduceUtil.initCredentials(job);

  job.waitForCompletion(true);
}
 
开发者ID:shenli-uiuc,项目名称:PyroDB,代码行数:36,代码来源:PerformanceEvaluation.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.NLineInputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。