Java InputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.InputFormat类的典型用法代码示例。如果您正苦于以下问题：Java InputFormat类的具体用法？Java InputFormat怎么用？Java InputFormat使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

InputFormat类属于org.apache.hadoop.mapred包，在下文中一共展示了InputFormat类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testInputFormat

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:23，代码来源:TestTableInputFormat.java

示例2: testAddInputPathWithMapper

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}

开发者ID:naver，项目名称:hadoop，代码行数:18，代码来源:TestMultipleInputs.java

示例3: getSample

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

开发者ID:naver，项目名称:hadoop，代码行数:29，代码来源:InputSampler.java

示例4: addDependencyJars

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:19，代码来源:TableMapReduceUtil.java

示例5: getInputFormatFromCache

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
public static InputFormat<WritableComparable, Writable> getInputFormatFromCache(
  Class inputFormatClass, JobConf job) throws IOException {
  InputFormat<WritableComparable, Writable> instance = inputFormats.get(inputFormatClass);
  if (instance == null) {
    try {
      instance = (InputFormat<WritableComparable, Writable>) ReflectionUtil
          .newInstance(inputFormatClass, job);
      // HBase input formats are not thread safe today. See HIVE-8808.
      String inputFormatName = inputFormatClass.getName().toLowerCase();
      if (!inputFormatName.contains("hbase")) {
        inputFormats.put(inputFormatClass, instance);
      }
    } catch (Exception e) {
      throw new IOException("Cannot create an instance of InputFormat class "
          + inputFormatClass.getName() + " as specified in mapredWork!", e);
    }
  }
  return instance;
}

开发者ID:mini666，项目名称:hive-phoenix-handler，代码行数:20，代码来源:HiveInputFormat.java

示例6: getRecordReader

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
private org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> getRecordReader(
  final InputFormat<BytesWritable, BytesWritable> inputFormat,
  final JobConf jobConf) throws ExecutionSetupException {
  try {
    final UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(this.opUserName, this.queryUserName);
    return ugi.doAs(new PrivilegedExceptionAction<org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable>>() {
      @Override
      public org.apache.hadoop.mapred.RecordReader<BytesWritable, BytesWritable> run() throws Exception {
        return inputFormat.getRecordReader(split, jobConf, Reporter.NULL);
      }
    });
  } catch (IOException | InterruptedException e) {
    throw new ExecutionSetupException(
      String.format("Error in creating sequencefile reader for file: %s, start: %d, length: %d",
        split.getPath(), split.getStart(), split.getLength()), e);
  }
}

开发者ID:axbaretto，项目名称:drill，代码行数:18，代码来源:SequenceFileRecordReader.java

示例7: getInputFormatFromSD

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
/**
 * Get the input format from given {@link StorageDescriptor}
 * @param properties
 * @param hiveReadEntry
 * @param sd
 * @return {@link InputFormat} class or null if a failure has occurred. Failure is logged as warning.
 */
private Class<? extends InputFormat<?, ?>> getInputFormatFromSD(final Properties properties,
    final HiveReadEntry hiveReadEntry, final StorageDescriptor sd, final HiveConf hiveConf) {
  final Table hiveTable = hiveReadEntry.getTable();
  try {
    final String inputFormatName = sd.getInputFormat();
    if (!Strings.isNullOrEmpty(inputFormatName)) {
      return (Class<? extends InputFormat<?, ?>>) Class.forName(inputFormatName);
    }

    final JobConf job = new JobConf(hiveConf);
    HiveUtilities.addConfToJob(job, properties);
    return HiveUtilities.getInputFormatClass(job, sd, hiveTable);
  } catch (final Exception e) {
    logger.warn("Failed to get InputFormat class from Hive table '{}.{}'. StorageDescriptor [{}]",
        hiveTable.getDbName(), hiveTable.getTableName(), sd.toString(), e);
    return null;
  }
}

开发者ID:axbaretto，项目名称:drill，代码行数:26，代码来源:ConvertHiveParquetScanToDrillParquetScan.java

示例8: getInputFormatClass

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
/**
 * Utility method which gets table or partition {@link InputFormat} class. First it
 * tries to get the class name from given StorageDescriptor object. If it doesn't contain it tries to get it from
 * StorageHandler class set in table properties. If not found throws an exception.
 * @param job {@link JobConf} instance needed incase the table is StorageHandler based table.
 * @param sd {@link StorageDescriptor} instance of currently reading partition or table (for non-partitioned tables).
 * @param table Table object
 * @throws Exception
 */
public static Class<? extends InputFormat<?, ?>> getInputFormatClass(final JobConf job, final StorageDescriptor sd,
    final Table table) throws Exception {
  final String inputFormatName = sd.getInputFormat();
  if (Strings.isNullOrEmpty(inputFormatName)) {
    final String storageHandlerClass = table.getParameters().get(META_TABLE_STORAGE);
    if (Strings.isNullOrEmpty(storageHandlerClass)) {
      throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There is neither " +
          "InputFormat class explicitly specified nor StorageHandler class");
    }
    final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, storageHandlerClass);
    return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass();
  } else {
    return (Class<? extends InputFormat<?, ?>>) Class.forName(inputFormatName) ;
  }
}

开发者ID:axbaretto，项目名称:drill，代码行数:25，代码来源:HiveUtilities.java

示例9: runImportRCFile

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
private void runImportRCFile(ExaIterator ctx, List<HCatTableColumn> columns, List<HCatTableColumn> partitionColumns, List<OutputColumnSpec> outputColumns, String file) throws Exception {
    List<HCatSerDeParameter> serDeParameters = new ArrayList<>();
    serDeParameters.add(new HCatSerDeParameter("serialization.format", "1"));
    
    String inputFormatClassName = "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
    String serDeClassName = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe";
    String hdfsUser = "hdfs";
    boolean useKerberos = false;
    
    List<String> hdfsServers = new ArrayList<>();
    hdfsServers.add("file:///");
    final Configuration conf = new Configuration();
    FileSystem fs = HdfsService.getFileSystem(hdfsServers,conf);
    
    InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) UdfUtils.getInstanceByName(inputFormatClassName);
    SerDe serDe = (SerDe) UdfUtils.getInstanceByName(serDeClassName);
    HdfsSerDeImportService.importFile(fs, file, partitionColumns, inputFormat, serDe, serDeParameters, hdfsServers, hdfsUser, columns, outputColumns, useKerberos, ctx);
}

开发者ID:EXASOL，项目名称:hadoop-etl-udfs，代码行数:19，代码来源:HdfsSerDeImportServiceTest.java

示例10: HiveFileIterator

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
public HiveFileIterator(
        Path path,
        FileSystem fileSystem,
        DirectoryLister directoryLister,
        NamenodeStats namenodeStats,
        String partitionName,
        InputFormat<?, ?> inputFormat,
        Properties schema,
        List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> effectivePredicate)
{
    this.partitionName = requireNonNull(partitionName, "partitionName is null");
    this.inputFormat = requireNonNull(inputFormat, "inputFormat is null");
    this.schema = requireNonNull(schema, "schema is null");
    this.partitionKeys = requireNonNull(partitionKeys, "partitionKeys is null");
    this.effectivePredicate = requireNonNull(effectivePredicate, "effectivePredicate is null");
    this.path = requireNonNull(path, "path is null");
    this.fileSystem = requireNonNull(fileSystem, "fileSystem is null");
    this.directoryLister = requireNonNull(directoryLister, "directoryLister is null");
    this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
}

开发者ID:y-lan，项目名称:presto，代码行数:22，代码来源:HiveFileIterator.java

示例11: getSample

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
/**
 * For each split sampled, emit when the ratio of the number of records
 * retained to the total record count is less than the specified
 * frequency.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>();
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  long records = 0;
  long kept = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      ++records;
      if ((double) kept / records < freq) {
        ++kept;
        samples.add(key);
        key = reader.createKey();
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:31，代码来源:InputSampler.java

示例12: testAddInputPathWithMapper

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
@Test
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}

开发者ID:hopshadoop，项目名称:hops，代码行数:19，代码来源:TestMultipleInputs.java

示例13: testParquet

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
@Test
public void testParquet()
        throws Exception
{
    List<TestColumn> testColumns = getTestColumnsSupportedByParquet();

    HiveOutputFormat<?, ?> outputFormat = new MapredParquetOutputFormat();
    InputFormat<?, ?> inputFormat = new MapredParquetInputFormat();
    @SuppressWarnings("deprecation")
    SerDe serde = new ParquetHiveSerDe();
    File file = File.createTempFile("presto_test", "parquet");
    file.delete();
    try {
        FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, testColumns, NUM_ROWS);
        HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(false);
        testCursorProvider(cursorProvider, split, inputFormat, serde, testColumns, NUM_ROWS);
    }
    finally {
        //noinspection ResultOfMethodCallIgnored
        file.delete();
    }
}

开发者ID:y-lan，项目名称:presto，代码行数:23，代码来源:TestHiveFileFormats.java

示例14: createJobConf

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
private static JobConf createJobConf(Configuration conf, boolean useFastCopy) {
  Class<? extends InputFormat> inputFormat =
    (useFastCopy) ? FastCopyInputFormat.class : CopyInputFormat.class;
  JobConf jobconf = new JobConf(conf, DistCp.class);
  jobconf.setJobName(NAME);

  // turn off speculative execution, because DFS doesn't handle
  // multiple writers to the same file.
  jobconf.setReduceSpeculativeExecution(false);
  jobconf.setMapOutputKeyClass(FilePairComparable.class);
  jobconf.setMapOutputValueClass(Text.class);
  jobconf.setOutputKeyClass(FilePairComparable.class);
  jobconf.setOutputValueClass(Text.class);

  jobconf.setInputFormat(inputFormat);
  jobconf.setMapperClass(CopyFilesTask.class);
  jobconf.setReducerClass(CopyFilesTask.class);
    
  // Prevent the reducer from starting until all maps are done.
  jobconf.setInt("mapred.job.rushreduce.reduce.threshold", 0);
  jobconf.setFloat("mapred.reduce.slowstart.completed.maps", 1.0f);
  
  return jobconf;
}

开发者ID:rhli，项目名称:hadoop-EAR，代码行数:25，代码来源:DistCp.java

示例15: testRCBinary

import org.apache.hadoop.mapred.InputFormat; //导入依赖的package包/类
@Test
public void testRCBinary()
        throws Exception
{
    List<TestColumn> testColumns = ImmutableList.copyOf(filter(TEST_COLUMNS, testColumn -> {
        // RC file does not support complex type as key of a map
        return !testColumn.getName().equals("t_map_null_key_complex_key_value");
    }));

    HiveOutputFormat<?, ?> outputFormat = new RCFileOutputFormat();
    InputFormat<?, ?> inputFormat = new RCFileInputFormat<>();
    @SuppressWarnings("deprecation")
    SerDe serde = new LazyBinaryColumnarSerDe();
    File file = File.createTempFile("presto_test", "rc-binary");
    try {
        FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null, testColumns, NUM_ROWS);
        testCursorProvider(new ColumnarBinaryHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS);
        testCursorProvider(new GenericHiveRecordCursorProvider(), split, inputFormat, serde, testColumns, NUM_ROWS);
    }
    finally {
        //noinspection ResultOfMethodCallIgnored
        file.delete();
    }
}

开发者ID:y-lan，项目名称:presto，代码行数:25，代码来源:TestHiveFileFormats.java

注：本文中的org.apache.hadoop.mapred.InputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。