Java FileInputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.FileInputFormat类的典型用法代码示例。如果您正苦于以下问题：Java FileInputFormat类的具体用法？Java FileInputFormat怎么用？Java FileInputFormat使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

FileInputFormat类属于org.apache.hadoop.mapred包，在下文中一共展示了FileInputFormat类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runTests

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

开发者ID:naver，项目名称:hadoop，代码行数:30，代码来源:NNBench.java

示例2: runIOTest

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

开发者ID:naver，项目名称:hadoop，代码行数:18，代码来源:TestDFSIO.java

示例3: configure

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:67，代码来源:TestKeyFieldBasedComparator.java

示例4: createCopyJob

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
 * Creates a simple copy job.
 * 
 * @param indirs List of input directories.
 * @param outdir Output directory.
 * @return JobConf initialised for a simple copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception {

  Configuration defaults = new Configuration();
  JobConf theJob = new JobConf(defaults, TestJobControl.class);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0]));
  theJob.setMapperClass(DataCopy.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopy.class);
  theJob.setNumMapTasks(12);
  theJob.setNumReduceTasks(4);
  return theJob;
}

开发者ID:naver，项目名称:hadoop，代码行数:25，代码来源:JobControlTestUtils.java

示例5: validateInput

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void validateInput(JobConf job) throws IOException {
  // expecting exactly one path
  Path [] tableNames = FileInputFormat.getInputPaths(job);
  if (tableNames == null || tableNames.length > 1) {
    throw new IOException("expecting one table name");
  }

  // connected to table?
  if (getHTable() == null) {
    throw new IOException("could not connect to table '" +
      tableNames[0].getName() + "'");
  }

  // expecting at least one column
  String colArg = job.get(COLUMN_LIST);
  if (colArg == null || colArg.length() == 0) {
    throw new IOException("expecting at least one column");
  }
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:20，代码来源:TableInputFormat.java

示例6: getOldAPIJobconf

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
                                        String input, String output)
    throws Exception {
  final JobConf jobConf = new JobConf(configuration);
  final FileSystem fs = FileSystem.get(configuration);
  if (fs.exists(new Path(output))) {
    fs.delete(new Path(output), true);
  }
  fs.close();
  jobConf.setJobName(name);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(IntWritable.class);
  jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
  jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
  jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);

  jobConf.setInputFormat(SequenceFileInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(jobConf, new Path(input));
  FileOutputFormat.setOutputPath(jobConf, new Path(output));
  return jobConf;
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:24，代码来源:OldAPICombinerTest.java

示例7: configure

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(TestMapper.class);
    // job.setReducerClass(TestReducer.class);

    // Set the output types of the mapper and reducer
    // job.setMapOutputKeyClass(IntWritable.class);
    // job.setMapOutputValueClass(NullWritable.class);
    // job.setOutputKeyClass(NullWritable.class);
    // job.setOutputValueClass(NullWritable.class);

    // Make sure this jar is included
    job.setJarByClass(TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_filename));
}

开发者ID:brownsys，项目名称:tracing-framework，代码行数:26，代码来源:ReadDataJob.java

示例8: readEthereumBlockInputFormatGenesisBlock

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatGenesisBlock() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="ethgenesis.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for genesis block contains at least one block");
assertEquals( 0, block.getEthereumTransactions().size(),"Genesis Block must have 0 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in genesis Block");
   	reader.close();
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:23，代码来源:EthereumFormatHadoopTest.java

示例9: main

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

		JobConf conf = new JobConf(WeatherData.class);
		conf.setJobName("temp");

		// Note:- As Mapper's output types are not default so we have to define
		// the
		// following properties.
		conf.setMapOutputKeyClass(Text.class);
		conf.setMapOutputValueClass(Text.class);

		conf.setMapperClass(MaxTemperatureMapper.class);
		conf.setReducerClass(MaxTemperatureReducer.class);

		conf.setInputFormat(TextInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);

		FileInputFormat.setInputPaths(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));

		JobClient.runJob(conf);

	}

开发者ID:gauravdangi，项目名称:Hadoop-CaseStudies，代码行数:24，代码来源:WeatherData.java

示例10: readEthereumBlockInputFormatBlock3346406

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock3346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth3346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");

BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 3346406 contains at least one block");
assertEquals( 7, block.getEthereumTransactions().size(),"Block 3346406 must have 7 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 3346406");
   	reader.close();
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:24，代码来源:EthereumFormatHadoopTest.java

示例11: merge

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
    throws Exception {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("LinkDb merge: starting at " + sdf.format(start));

  JobConf job = createMergeJob(getConf(), output, normalize, filter);
  for (int i = 0; i < dbs.length; i++) {
    FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
  }
  JobClient.runJob(job);
  FileSystem fs = FileSystem.get(getConf());
  fs.mkdirs(output);
  fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
      LinkDb.CURRENT_NAME));

  long end = System.currentTimeMillis();
  LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
      + TimingUtil.elapsedTime(start, end));
}

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:21，代码来源:LinkDbMerger.java

示例12: readEthereumBlockInputFormatBlock1346406GzipCompressed

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock1346406GzipCompressed() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	JobConf job = new JobConf(defaultConf);
				ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin.gz";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 1346406");
   	reader.close();
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:23，代码来源:EthereumFormatHadoopTest.java

示例13: createJobConf

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private JobConf createJobConf() throws IOException {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(EsInputFormat.class);
    conf.setOutputFormat(PrintStreamOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    boolean type = random.nextBoolean();
    Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class);
    conf.setOutputValueClass(mapType);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.set(ConfigurationOptions.ES_QUERY, query);
    conf.setNumReduceTasks(0);

    conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata));
    conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true));
    conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson));

    QueryTestParams.provisionQueries(conf);
    FileInputFormat.setInputPaths(conf, new Path(TestUtils.sampleArtistsDat()));

    HdpBootstrap.addProperties(conf, TestSettings.TESTING_PROPS, false);
    return conf;
}

开发者ID:xushjie1987，项目名称:es-hadoop-v2.2.0，代码行数:24，代码来源:AbstractMROldApiSearchTest.java

示例14: readEthereumBlockInputFormatBlock1346406

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock1346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	JobConf job = new JobConf(defaultConf);
				ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
		
BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 1346406");
   	reader.close();
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:24，代码来源:EthereumFormatHadoopTest.java

示例15: setInputPaths

import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
 * setInputPaths add all the paths in the provided list to the Job conf object
 * as input paths for the job.
 *
 * @param job
 * @param pathsToAdd
 */
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {

  Path[] addedPaths = FileInputFormat.getInputPaths(job);
  if (addedPaths == null) {
    addedPaths = new Path[0];
  }

  Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
  System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);

  int i = 0;
  for(Path p: pathsToAdd) {
    combined[addedPaths.length + (i++)] = p;
  }
  FileInputFormat.setInputPaths(job, combined);
}

开发者ID:mini666，项目名称:hive-phoenix-handler，代码行数:24，代码来源:Utilities.java

注：本文中的org.apache.hadoop.mapred.FileInputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。