當前位置: 首頁>>代碼示例>>Java>>正文


Java TextInputFormat類代碼示例

本文整理匯總了Java中org.apache.hadoop.mapred.TextInputFormat的典型用法代碼示例。如果您正苦於以下問題:Java TextInputFormat類的具體用法?Java TextInputFormat怎麽用?Java TextInputFormat使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


TextInputFormat類屬於org.apache.hadoop.mapred包,在下文中一共展示了TextInputFormat類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: testAddInputPathWithMapper

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:18,代碼來源:TestMultipleInputs.java

示例2: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:67,代碼來源:TestKeyFieldBasedComparator.java

示例3: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:22,代碼來源:PipeMapper.java

示例4: fillMap

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
private static void fillMap(JetInstance client, String name, String inputPath, int parallelism) throws Exception {
    DAG dag = new DAG();
    JobConf conf = new JobConf();
    conf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(conf, new Path(inputPath));


    Vertex reader = dag.newVertex("reader", readHdfsP(conf, Util::entry));
    Vertex mapper = dag.newVertex("mapper",
            mapP((Map.Entry<LongWritable, Text> e) -> entry(e.getKey().get(), e.getValue().toString())));
    Vertex writer = dag.newVertex("writer", writeMapP(name));

    reader.localParallelism(parallelism);
    mapper.localParallelism(parallelism);
    writer.localParallelism(parallelism);

    dag.edge(between(reader, mapper));
    dag.edge(between(mapper, writer));


    JobConfig jobConfig = new JobConfig();
    jobConfig.addClass(HdfsToMap.class);

    client.newJob(dag, jobConfig).join();
}
 
開發者ID:hazelcast,項目名稱:big-data-benchmark,代碼行數:26,代碼來源:HdfsToMap.java

示例5: addDependencyJars

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}
 
開發者ID:fengchen8086,項目名稱:ditb,代碼行數:19,代碼來源:TableMapReduceUtil.java

示例6: main

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

		JobConf conf = new JobConf(WeatherData.class);
		conf.setJobName("temp");

		// Note:- As Mapper's output types are not default so we have to define
		// the
		// following properties.
		conf.setMapOutputKeyClass(Text.class);
		conf.setMapOutputValueClass(Text.class);

		conf.setMapperClass(MaxTemperatureMapper.class);
		conf.setReducerClass(MaxTemperatureReducer.class);

		conf.setInputFormat(TextInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);

		FileInputFormat.setInputPaths(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));

		JobClient.runJob(conf);

	}
 
開發者ID:gauravdangi,項目名稱:Hadoop-CaseStudies,代碼行數:24,代碼來源:WeatherData.java

示例7: run

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
@Override
public void run(String[] args) throws Exception {
  Flags flags = new Flags();
  flags.addWithDefaultValue(
      "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
  flags.addWithDefaultValue(
      "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
  flags.add("output");
  flags.parseAndCheck(args);
  
  JobConf job = new JobConf(this.getClass());
  job.setJobName("convert-douban-raw-to-posts");
  MapReduceHelper.setAllOutputTypes(job, Text.class);
  MapReduceHelper.setMR(
      job, DoubanRawMapper.class, DoubanToPostReducer.class);
  job.setInputFormat(TextInputFormat.class);
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("tag_subject_data")));
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("subject_data")));
  job.setOutputFormat(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(
      job, new Path(flags.getString("output")));
  JobClient.runJob(job);
}
 
開發者ID:thunlp,項目名稱:THUTag,代碼行數:26,代碼來源:ImportDouban.java

示例8: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(ReadDataJob.TestMapper.class);

    // Make sure this jar is included
    job.setJarByClass(ReadDataJob.TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_path));
}
 
開發者ID:brownsys,項目名稱:tracing-framework,代碼行數:19,代碼來源:ReadExistingDataJob.java

示例9: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(TestMapper.class);
    // job.setReducerClass(TestReducer.class);

    // Set the output types of the mapper and reducer
    // job.setMapOutputKeyClass(IntWritable.class);
    // job.setMapOutputValueClass(NullWritable.class);
    // job.setOutputKeyClass(NullWritable.class);
    // job.setOutputValueClass(NullWritable.class);

    // Make sure this jar is included
    job.setJarByClass(TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
 
開發者ID:brownsys,項目名稱:tracing-framework,代碼行數:26,代碼來源:ReadDataJob.java

示例10: main

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	JobConf conf = new JobConf(WordCountOldAPI.class);
	conf.setJobName("old wordcount");

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(IntWritable.class);

	conf.setMapperClass(Map.class);
	conf.setCombinerClass(Reduce.class);
	conf.setReducerClass(Reduce.class);

	conf.setInputFormat(TextInputFormat.class);
	conf.setOutputFormat(TextOutputFormat.class);

	FileInputFormat.setInputPaths(conf, new Path(args[0]));
	FileOutputFormat.setOutputPath(conf, new Path(args[1]));

	JobClient.runJob(conf);
}
 
開發者ID:zirpins,項目名稱:bdelab,代碼行數:20,代碼來源:WordCountOldAPI.java

示例11: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:22,代碼來源:PipeMapper.java

示例12: createJobConf

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
private static JobConf createJobConf(Configuration conf) throws IOException {
  JobConf jobConf = new JobConf(conf);
  String jobName = "transaction_generator";
  jobConf.setJobName(jobName);
  
  String splitDir = workplace + "split/";
  
  jobConf.set(TEST_DIR_LABEL, workplace);
  
  jobConf.setMapSpeculativeExecution(false);
  jobConf.setJarByClass(TxnGenerator.class);
  jobConf.setMapperClass(GeneratorMapper.class);
  jobConf.setInputFormat(TextInputFormat.class);
  
  FileInputFormat.addInputPath(jobConf, new Path(splitDir));
  Random random = new Random();
  FileOutputFormat.setOutputPath(jobConf, new Path(workplace, "output" + random.nextLong()));
  
  jobConf.setNumReduceTasks(0);
  jobConf.setNumMapTasks(numMappers);
  
  createSplitFiles(conf, new Path(splitDir));
  
  return jobConf;
}
 
開發者ID:rhli,項目名稱:hadoop-EAR,代碼行數:26,代碼來源:TxnGenerator.java

示例13: configure

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
@SuppressWarnings("unchecked")
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  String inputFormatClassName = job.getClass("mapred.input.format.class",
      TextInputFormat.class).getCanonicalName();
  ignoreKey = ignoreKey || inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
開發者ID:rhli,項目名稱:hadoop-EAR,代碼行數:21,代碼來源:PipeMapper.java

示例14: run

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public void run(String[] args) throws Exception
{

  JobConf conf = new JobConf(this.getClass());
  conf.setJobName("wordcount");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);

  conf.setMapperClass(Map.class);
  conf.setCombinerClass(Reduce.class);
  conf.setReducerClass(Reduce.class);

  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(conf, new Path(args[0]));
  FileOutputFormat.setOutputPath(conf, new Path(args[1]));

  JobClient.runJob(conf);
}
 
開發者ID:apache,項目名稱:apex-malhar,代碼行數:22,代碼來源:WordCount.java

示例15: CSVReadTask

import org.apache.hadoop.mapred.TextInputFormat; //導入依賴的package包/類
public CSVReadTask(InputSplit split, SplitOffsetInfos offsets,
		TextInputFormat informat, JobConf job, MatrixBlock dest,
		long rlen, long clen, boolean hasHeader, String delim,
		boolean fill, double fillValue, int splitCount) 
{
	_split = split;
	_splitoffsets = offsets; // new SplitOffsetInfos(offsets);
	_sparse = dest.isInSparseFormat();
	_informat = informat;
	_job = job;
	_dest = dest;
	_rlen = rlen;
	_clen = clen;
	_isFirstSplit = (splitCount == 0);
	_hasHeader = hasHeader;
	_fill = fill;
	_fillValue = fillValue;
	_delim = delim;
	_rc = true;
	_splitCount = splitCount;
}
 
開發者ID:apache,項目名稱:systemml,代碼行數:22,代碼來源:ReaderTextCSVParallel.java


注:本文中的org.apache.hadoop.mapred.TextInputFormat類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。