当前位置: 首页>>代码示例>>Java>>正文


Java FileInputFormat.getInputPaths方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.FileInputFormat.getInputPaths方法的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat.getInputPaths方法的具体用法?Java FileInputFormat.getInputPaths怎么用?Java FileInputFormat.getInputPaths使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.FileInputFormat的用法示例。


在下文中一共展示了FileInputFormat.getInputPaths方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: validateInput

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
public void validateInput(JobConf job) throws IOException {
  // expecting exactly one path
  Path [] tableNames = FileInputFormat.getInputPaths(job);
  if (tableNames == null || tableNames.length > 1) {
    throw new IOException("expecting one table name");
  }

  // connected to table?
  if (getHTable() == null) {
    throw new IOException("could not connect to table '" +
      tableNames[0].getName() + "'");
  }

  // expecting at least one column
  String colArg = job.get(COLUMN_LIST);
  if (colArg == null || colArg.length() == 0) {
    throw new IOException("expecting at least one column");
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:20,代码来源:TableInputFormat.java

示例2: setInputPaths

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
/**
 * setInputPaths add all the paths in the provided list to the Job conf object
 * as input paths for the job.
 *
 * @param job
 * @param pathsToAdd
 */
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {

  Path[] addedPaths = FileInputFormat.getInputPaths(job);
  if (addedPaths == null) {
    addedPaths = new Path[0];
  }

  Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
  System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);

  int i = 0;
  for(Path p: pathsToAdd) {
    combined[addedPaths.length + (i++)] = p;
  }
  FileInputFormat.setInputPaths(job, combined);
}
 
开发者ID:mini666,项目名称:hive-phoenix-handler,代码行数:24,代码来源:Utilities.java

示例3: getInputPaths

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
Path[] getInputPaths(JobConf job) throws IOException {
  Path[] dirs = FileInputFormat.getInputPaths(job);
  if (dirs.length == 0) {
    // on tez we're avoiding to duplicate the file info in FileInputFormat.
    if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
      try {
        List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
        dirs = paths.toArray(new Path[paths.size()]);
      } catch (Exception e) {
        throw new IOException("Could not create input files", e);
      }
    } else {
      throw new IOException("No input paths specified in job");
    }
  }
  return dirs;
}
 
开发者ID:mini666,项目名称:hive-phoenix-handler,代码行数:18,代码来源:HiveInputFormat.java

示例4: getSplits

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
/**
 * Provide the required splits from the specified configuration. By default this
 *   method makes query (function-execution) on the region with `_meta' suffix
 *   so need to be make sure that the region-name is passed accordingly.
 *
 * @param conf the job configuration
 * @param numSplits the required number of splits
 * @return the required splits to read/write the data
 * @throws IOException if table does not exist.
 */
public static InputSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException {
  final Path[] tablePaths = FileInputFormat.getInputPaths(conf);
  /** initialize cache if not done yet.. **/
  final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf);
  String tableName = conf.get(MonarchUtils.REGION);
  boolean isFTable = MonarchUtils.isFTable(conf);
  Table table = null;
  if (isFTable) {
    table = aClient.getFTable(tableName);
  } else {
    table = aClient.getMTable(tableName);
  }
  if (table == null) {
    throw new IOException("Table " + tableName + "does not exist.");
  }
  int totalnumberOfSplits = table.getTableDescriptor().getTotalNumOfSplits();
  Map<Integer, Set<ServerLocation>> bucketMap = new HashMap<>(numSplits);
  final AtomicLong start = new AtomicLong(0L);
  MonarchSplit[] splits = MTableUtils
    .getSplitsWithSize(tableName, numSplits, totalnumberOfSplits, bucketMap)
    .stream().map(e -> {
      MonarchSplit ms = convertToSplit(tablePaths, start.get(), e, bucketMap);
      start.addAndGet(e.getSize());
      return ms;
    }).toArray(MonarchSplit[]::new);
  logger.info("numSplits= {}; MonarchSplits= {}", numSplits, Arrays.toString(splits));
  return splits;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:39,代码来源:MonarchSplit.java

示例5: initialize

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
protected void initialize(JobConf job) throws IOException {
  Path[] tableNames = FileInputFormat.getInputPaths(job);
  String colArg = job.get(COLUMN_LIST);
  String[] colNames = colArg.split(" ");
  byte [][] m_cols = new byte[colNames.length][];
  for (int i = 0; i < m_cols.length; i++) {
    m_cols[i] = Bytes.toBytes(colNames[i]);
  }
  setInputColumns(m_cols);
  Connection connection = ConnectionFactory.createConnection(job);
  initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:14,代码来源:TableInputFormat.java

示例6: getSplits

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf job, int numSplits)
        throws IOException {
    Path[] paths = FileInputFormat.getInputPaths(job);

    return FluentIterable.from(BaseInputFormat.getSplits(job, paths))
            .transform(_fromSplit)
            .toArray(InputSplit.class);
}
 
开发者ID:bazaarvoice,项目名称:emodb,代码行数:10,代码来源:EmoInputFormat.java

示例7: getInputPath

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
protected Path getInputPath(JobConf conf) {
  Path path = null;

  Path[] paths = FileInputFormat.getInputPaths(conf);
  if ((paths != null) && (paths.length > 0)) {
    path = paths[0];
  }

  return path;
}
 
开发者ID:awslabs,项目名称:emr-dynamodb-connector,代码行数:12,代码来源:HiveDynamoDBSplitGenerator.java

示例8: getStatistics

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:30,代码来源:HadoopInputFormatBase.java

示例9: getStatistics

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
	// only gather base statistics for FileInputFormats
	if (!(mapredInputFormat instanceof FileInputFormat)) {
		return null;
	}

	final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
			(FileBaseStatistics) cachedStats : null;

	try {
		final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);

		return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
	} catch (IOException ioex) {
		if (LOG.isWarnEnabled()) {
			LOG.warn("Could not determine statistics due to an io error: "
					+ ioex.getMessage());
		}
	} catch (Throwable t) {
		if (LOG.isErrorEnabled()) {
			LOG.error("Unexpected problem while getting the file statistics: "
					+ t.getMessage(), t);
		}
	}

	// no statistics available
	return null;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:30,代码来源:HadoopInputFormatBase.java

示例10: testInputPath

import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
public void testInputPath() throws Exception {
  JobConf jobConf = new JobConf();
  Path workingDir = jobConf.getWorkingDirectory();
  
  Path path = new Path(workingDir, 
      "xx{y"+StringUtils.COMMA_STR+"z}");
  FileInputFormat.setInputPaths(jobConf, path);
  Path[] paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(1, paths.length);
  assertEquals(path.toString(), paths[0].toString());
   
  StringBuilder pathStr = new StringBuilder();
  pathStr.append(StringUtils.ESCAPE_CHAR);
  pathStr.append(StringUtils.ESCAPE_CHAR);
  pathStr.append(StringUtils.COMMA);
  pathStr.append(StringUtils.COMMA);
  pathStr.append('a');
  path = new Path(workingDir, pathStr.toString());
  FileInputFormat.setInputPaths(jobConf, path);
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(1, paths.length);
  assertEquals(path.toString(), paths[0].toString());
    
  pathStr.setLength(0);
  pathStr.append(StringUtils.ESCAPE_CHAR);
  pathStr.append("xx");
  pathStr.append(StringUtils.ESCAPE_CHAR);
  path = new Path(workingDir, pathStr.toString());
  Path path1 = new Path(workingDir,
      "yy"+StringUtils.COMMA_STR+"zz");
  FileInputFormat.setInputPaths(jobConf, path);
  FileInputFormat.addInputPath(jobConf, path1);
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(2, paths.length);
  assertEquals(path.toString(), paths[0].toString());
  assertEquals(path1.toString(), paths[1].toString());

  FileInputFormat.setInputPaths(jobConf, path, path1);
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(2, paths.length);
  assertEquals(path.toString(), paths[0].toString());
  assertEquals(path1.toString(), paths[1].toString());

  Path[] input = new Path[] {path, path1};
  FileInputFormat.setInputPaths(jobConf, input);
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(2, paths.length);
  assertEquals(path.toString(), paths[0].toString());
  assertEquals(path1.toString(), paths[1].toString());
  
  pathStr.setLength(0);
  String str1 = "{a{b,c},de}";
  String str2 = "xyz";
  String str3 = "x{y,z}";
  pathStr.append(str1);
  pathStr.append(StringUtils.COMMA);
  pathStr.append(str2);
  pathStr.append(StringUtils.COMMA);
  pathStr.append(str3);
  FileInputFormat.setInputPaths(jobConf, pathStr.toString());
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(3, paths.length);
  assertEquals(new Path(workingDir, str1).toString(), paths[0].toString());
  assertEquals(new Path(workingDir, str2).toString(), paths[1].toString());
  assertEquals(new Path(workingDir, str3).toString(), paths[2].toString());

  pathStr.setLength(0);
  String str4 = "abc";
  String str5 = "pq{r,s}";
  pathStr.append(str4);
  pathStr.append(StringUtils.COMMA);
  pathStr.append(str5);
  FileInputFormat.addInputPaths(jobConf, pathStr.toString());
  paths = FileInputFormat.getInputPaths(jobConf);
  assertEquals(5, paths.length);
  assertEquals(new Path(workingDir, str1).toString(), paths[0].toString());
  assertEquals(new Path(workingDir, str2).toString(), paths[1].toString());
  assertEquals(new Path(workingDir, str3).toString(), paths[2].toString());
  assertEquals(new Path(workingDir, str4).toString(), paths[3].toString());
  assertEquals(new Path(workingDir, str5).toString(), paths[4].toString());
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:82,代码来源:TestInputPath.java


注:本文中的org.apache.hadoop.mapred.FileInputFormat.getInputPaths方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。