Java InputFormat.getSplits方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.InputFormat.getSplits方法的典型用法代码示例。如果您正苦于以下问题：Java InputFormat.getSplits方法的具体用法？Java InputFormat.getSplits怎么用？Java InputFormat.getSplits使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.InputFormat的用法示例。

在下文中一共展示了InputFormat.getSplits方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSample

import org.apache.hadoop.mapred.InputFormat; //导入方法依赖的package包/类
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

开发者ID:naver，项目名称:hadoop，代码行数:29，代码来源:InputSampler.java

示例2: init

import org.apache.hadoop.mapred.InputFormat; //导入方法依赖的package包/类
@Override
public void init(@Nonnull Context context) {
    logger = context.jetInstance().getHazelcastInstance().getLoggingService().getLogger(ReadHdfsP.class);
    try {
        int totalParallelism = context.totalParallelism();
        InputFormat inputFormat = jobConf.getInputFormat();
        InputSplit[] splits = inputFormat.getSplits(jobConf, totalParallelism);
        IndexedInputSplit[] indexedInputSplits = new IndexedInputSplit[splits.length];
        Arrays.setAll(indexedInputSplits, i -> new IndexedInputSplit(i, splits[i]));

        Address[] addrs = context.jetInstance().getCluster().getMembers()
                .stream().map(Member::getAddress).toArray(Address[]::new);
        assigned = assignSplitsToMembers(indexedInputSplits, addrs);
        printAssignments(assigned);
    } catch (IOException e) {
        throw rethrow(e);
    }
}

开发者ID:hazelcast，项目名称:hazelcast-jet，代码行数:19，代码来源:ReadHdfsP.java

示例3: splitInput

import org.apache.hadoop.mapred.InputFormat; //导入方法依赖的package包/类
private void splitInput(final Properties properties, final StorageDescriptor sd, final Partition partition)
    throws ReflectiveOperationException, IOException {
  final JobConf job = new JobConf();
  for (final Object obj : properties.keySet()) {
    job.set((String) obj, (String) properties.get(obj));
  }
  for (final Map.Entry<String, String> entry : hiveReadEntry.hiveConfigOverride.entrySet()) {
    job.set(entry.getKey(), entry.getValue());
  }
  InputFormat<?, ?> format = (InputFormat<?, ?>)
      Class.forName(sd.getInputFormat()).getConstructor().newInstance();
  job.setInputFormat(format.getClass());
  final Path path = new Path(sd.getLocation());
  final FileSystem fs = path.getFileSystem(job);

  if (fs.exists(path)) {
    FileInputFormat.addInputPath(job, path);
    format = job.getInputFormat();
    for (final InputSplit split : format.getSplits(job, 1)) {
      inputSplits.add(split);
      partitionMap.put(split, partition);
    }
  }
  final String numRowsProp = properties.getProperty("numRows");
  logger.trace("HiveScan num rows property = {}", numRowsProp);
  if (numRowsProp != null) {
    final long numRows = Long.valueOf(numRowsProp);
    // starting from hive-0.13, when no statistics are available, this property is set to -1
    // it's important to note that the value returned by hive may not be up to date
    if (numRows > 0) {
      rowCount += numRows;
    }
  }
}

开发者ID:skhalifa，项目名称:QDrill，代码行数:35，代码来源:HiveScan.java

示例4: getSplits

import org.apache.hadoop.mapred.InputFormat; //导入方法依赖的package包/类
private InputSplit[] getSplits() throws IOException {
  InputFormat<Key, Row> inputFormat = this.gfxdManager.getInputFormat();
  try {
    return inputFormat.getSplits(this.jobConf, 1);
  } catch (FileNotFoundException fnfe) {
    throw new FileNotFoundException(
        "Table "
            + this.gfxdManager.getTable()
            + " not found. "
            + "The LOCATION string may contain incorrect value for one or more of the following:"
            + "1. Path to HDFSSTORE (homeDir), 2. Schema name or 3. Table name. "
            + GemFireXDManager.LOCATION_FORMAT);
  }
}

开发者ID:gemxd，项目名称:gemfirexd-oss，代码行数:15，代码来源:GemFireXDFragmenter.java

示例5: addSplitsForGroup

import org.apache.hadoop.mapred.InputFormat; //导入方法依赖的package包/类
private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf,
    InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits,
    TableDesc table, List<InputSplit> result) throws IOException {

  Utilities.copyTablePropertiesToConf(table, conf);

  if (tableScan != null) {
    pushFilters(conf, tableScan);
  }

  FileInputFormat.setInputPaths(conf, dirs.toArray(new Path[dirs.size()]));
  conf.setInputFormat(inputFormat.getClass());

  int headerCount = 0;
  int footerCount = 0;
  if (table != null) {
    headerCount = Utilities.getHeaderCount(table);
    footerCount = Utilities.getFooterCount(table, conf);
    if (headerCount != 0 || footerCount != 0) {
      // Input file has header or footer, cannot be splitted.
      conf.setLong(
          ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
          Long.MAX_VALUE);
    }
  }

  InputSplit[] iss = inputFormat.getSplits(conf, splits);
  for (InputSplit is : iss) {
    result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
}

开发者ID:mini666，项目名称:hive-phoenix-handler，代码行数:32，代码来源:HiveInputFormat.java

注：本文中的org.apache.hadoop.mapred.InputFormat.getSplits方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。