Java InputFormat.getSplits方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapred.InputFormat.getSplits方法的典型用法代碼示例。如果您正苦於以下問題：Java InputFormat.getSplits方法的具體用法？Java InputFormat.getSplits怎麽用？Java InputFormat.getSplits使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapred.InputFormat的用法示例。

在下文中一共展示了InputFormat.getSplits方法的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: getSample

import org.apache.hadoop.mapred.InputFormat; //導入方法依賴的package包/類
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}

開發者ID:naver，項目名稱:hadoop，代碼行數:29，代碼來源:InputSampler.java

示例2: init

import org.apache.hadoop.mapred.InputFormat; //導入方法依賴的package包/類
@Override
public void init(@Nonnull Context context) {
    logger = context.jetInstance().getHazelcastInstance().getLoggingService().getLogger(ReadHdfsP.class);
    try {
        int totalParallelism = context.totalParallelism();
        InputFormat inputFormat = jobConf.getInputFormat();
        InputSplit[] splits = inputFormat.getSplits(jobConf, totalParallelism);
        IndexedInputSplit[] indexedInputSplits = new IndexedInputSplit[splits.length];
        Arrays.setAll(indexedInputSplits, i -> new IndexedInputSplit(i, splits[i]));

        Address[] addrs = context.jetInstance().getCluster().getMembers()
                .stream().map(Member::getAddress).toArray(Address[]::new);
        assigned = assignSplitsToMembers(indexedInputSplits, addrs);
        printAssignments(assigned);
    } catch (IOException e) {
        throw rethrow(e);
    }
}

開發者ID:hazelcast，項目名稱:hazelcast-jet，代碼行數:19，代碼來源:ReadHdfsP.java

示例3: splitInput

import org.apache.hadoop.mapred.InputFormat; //導入方法依賴的package包/類
private void splitInput(final Properties properties, final StorageDescriptor sd, final Partition partition)
    throws ReflectiveOperationException, IOException {
  final JobConf job = new JobConf();
  for (final Object obj : properties.keySet()) {
    job.set((String) obj, (String) properties.get(obj));
  }
  for (final Map.Entry<String, String> entry : hiveReadEntry.hiveConfigOverride.entrySet()) {
    job.set(entry.getKey(), entry.getValue());
  }
  InputFormat<?, ?> format = (InputFormat<?, ?>)
      Class.forName(sd.getInputFormat()).getConstructor().newInstance();
  job.setInputFormat(format.getClass());
  final Path path = new Path(sd.getLocation());
  final FileSystem fs = path.getFileSystem(job);

  if (fs.exists(path)) {
    FileInputFormat.addInputPath(job, path);
    format = job.getInputFormat();
    for (final InputSplit split : format.getSplits(job, 1)) {
      inputSplits.add(split);
      partitionMap.put(split, partition);
    }
  }
  final String numRowsProp = properties.getProperty("numRows");
  logger.trace("HiveScan num rows property = {}", numRowsProp);
  if (numRowsProp != null) {
    final long numRows = Long.valueOf(numRowsProp);
    // starting from hive-0.13, when no statistics are available, this property is set to -1
    // it's important to note that the value returned by hive may not be up to date
    if (numRows > 0) {
      rowCount += numRows;
    }
  }
}

開發者ID:skhalifa，項目名稱:QDrill，代碼行數:35，代碼來源:HiveScan.java

示例4: getSplits

import org.apache.hadoop.mapred.InputFormat; //導入方法依賴的package包/類
private InputSplit[] getSplits() throws IOException {
  InputFormat<Key, Row> inputFormat = this.gfxdManager.getInputFormat();
  try {
    return inputFormat.getSplits(this.jobConf, 1);
  } catch (FileNotFoundException fnfe) {
    throw new FileNotFoundException(
        "Table "
            + this.gfxdManager.getTable()
            + " not found. "
            + "The LOCATION string may contain incorrect value for one or more of the following:"
            + "1. Path to HDFSSTORE (homeDir), 2. Schema name or 3. Table name. "
            + GemFireXDManager.LOCATION_FORMAT);
  }
}

開發者ID:gemxd，項目名稱:gemfirexd-oss，代碼行數:15，代碼來源:GemFireXDFragmenter.java

示例5: addSplitsForGroup

import org.apache.hadoop.mapred.InputFormat; //導入方法依賴的package包/類
private void addSplitsForGroup(List<Path> dirs, TableScanOperator tableScan, JobConf conf,
    InputFormat inputFormat, Class<? extends InputFormat> inputFormatClass, int splits,
    TableDesc table, List<InputSplit> result) throws IOException {

  Utilities.copyTablePropertiesToConf(table, conf);

  if (tableScan != null) {
    pushFilters(conf, tableScan);
  }

  FileInputFormat.setInputPaths(conf, dirs.toArray(new Path[dirs.size()]));
  conf.setInputFormat(inputFormat.getClass());

  int headerCount = 0;
  int footerCount = 0;
  if (table != null) {
    headerCount = Utilities.getHeaderCount(table);
    footerCount = Utilities.getFooterCount(table, conf);
    if (headerCount != 0 || footerCount != 0) {
      // Input file has header or footer, cannot be splitted.
      conf.setLong(
          ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
          Long.MAX_VALUE);
    }
  }

  InputSplit[] iss = inputFormat.getSplits(conf, splits);
  for (InputSplit is : iss) {
    result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
}

開發者ID:mini666，項目名稱:hive-phoenix-handler，代碼行數:32，代碼來源:HiveInputFormat.java

注：本文中的org.apache.hadoop.mapred.InputFormat.getSplits方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。