本文整理汇总了Java中org.apache.hadoop.mapred.FileInputFormat.getInputPaths方法的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat.getInputPaths方法的具体用法?Java FileInputFormat.getInputPaths怎么用?Java FileInputFormat.getInputPaths使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.FileInputFormat
的用法示例。
在下文中一共展示了FileInputFormat.getInputPaths方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: validateInput
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
public void validateInput(JobConf job) throws IOException {
// expecting exactly one path
Path [] tableNames = FileInputFormat.getInputPaths(job);
if (tableNames == null || tableNames.length > 1) {
throw new IOException("expecting one table name");
}
// connected to table?
if (getHTable() == null) {
throw new IOException("could not connect to table '" +
tableNames[0].getName() + "'");
}
// expecting at least one column
String colArg = job.get(COLUMN_LIST);
if (colArg == null || colArg.length() == 0) {
throw new IOException("expecting at least one column");
}
}
示例2: setInputPaths
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
/**
* setInputPaths add all the paths in the provided list to the Job conf object
* as input paths for the job.
*
* @param job
* @param pathsToAdd
*/
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {
Path[] addedPaths = FileInputFormat.getInputPaths(job);
if (addedPaths == null) {
addedPaths = new Path[0];
}
Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);
int i = 0;
for(Path p: pathsToAdd) {
combined[addedPaths.length + (i++)] = p;
}
FileInputFormat.setInputPaths(job, combined);
}
示例3: getInputPaths
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
Path[] getInputPaths(JobConf job) throws IOException {
Path[] dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
// on tez we're avoiding to duplicate the file info in FileInputFormat.
if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
try {
List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
dirs = paths.toArray(new Path[paths.size()]);
} catch (Exception e) {
throw new IOException("Could not create input files", e);
}
} else {
throw new IOException("No input paths specified in job");
}
}
return dirs;
}
示例4: getSplits
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
/**
* Provide the required splits from the specified configuration. By default this
* method makes query (function-execution) on the region with `_meta' suffix
* so need to be make sure that the region-name is passed accordingly.
*
* @param conf the job configuration
* @param numSplits the required number of splits
* @return the required splits to read/write the data
* @throws IOException if table does not exist.
*/
public static InputSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException {
final Path[] tablePaths = FileInputFormat.getInputPaths(conf);
/** initialize cache if not done yet.. **/
final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf);
String tableName = conf.get(MonarchUtils.REGION);
boolean isFTable = MonarchUtils.isFTable(conf);
Table table = null;
if (isFTable) {
table = aClient.getFTable(tableName);
} else {
table = aClient.getMTable(tableName);
}
if (table == null) {
throw new IOException("Table " + tableName + "does not exist.");
}
int totalnumberOfSplits = table.getTableDescriptor().getTotalNumOfSplits();
Map<Integer, Set<ServerLocation>> bucketMap = new HashMap<>(numSplits);
final AtomicLong start = new AtomicLong(0L);
MonarchSplit[] splits = MTableUtils
.getSplitsWithSize(tableName, numSplits, totalnumberOfSplits, bucketMap)
.stream().map(e -> {
MonarchSplit ms = convertToSplit(tablePaths, start.get(), e, bucketMap);
start.addAndGet(e.getSize());
return ms;
}).toArray(MonarchSplit[]::new);
logger.info("numSplits= {}; MonarchSplits= {}", numSplits, Arrays.toString(splits));
return splits;
}
示例5: initialize
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
protected void initialize(JobConf job) throws IOException {
Path[] tableNames = FileInputFormat.getInputPaths(job);
String colArg = job.get(COLUMN_LIST);
String[] colNames = colArg.split(" ");
byte [][] m_cols = new byte[colNames.length][];
for (int i = 0; i < m_cols.length; i++) {
m_cols[i] = Bytes.toBytes(colNames[i]);
}
setInputColumns(m_cols);
Connection connection = ConnectionFactory.createConnection(job);
initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
}
示例6: getSplits
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf job, int numSplits)
throws IOException {
Path[] paths = FileInputFormat.getInputPaths(job);
return FluentIterable.from(BaseInputFormat.getSplits(job, paths))
.transform(_fromSplit)
.toArray(InputSplit.class);
}
示例7: getInputPath
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
protected Path getInputPath(JobConf conf) {
Path path = null;
Path[] paths = FileInputFormat.getInputPaths(conf);
if ((paths != null) && (paths.length > 0)) {
path = paths[0];
}
return path;
}
示例8: getStatistics
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapredInputFormat instanceof FileInputFormat)) {
return null;
}
final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
示例9: getStatistics
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapredInputFormat instanceof FileInputFormat)) {
return null;
}
final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
示例10: testInputPath
import org.apache.hadoop.mapred.FileInputFormat; //导入方法依赖的package包/类
public void testInputPath() throws Exception {
JobConf jobConf = new JobConf();
Path workingDir = jobConf.getWorkingDirectory();
Path path = new Path(workingDir,
"xx{y"+StringUtils.COMMA_STR+"z}");
FileInputFormat.setInputPaths(jobConf, path);
Path[] paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(1, paths.length);
assertEquals(path.toString(), paths[0].toString());
StringBuilder pathStr = new StringBuilder();
pathStr.append(StringUtils.ESCAPE_CHAR);
pathStr.append(StringUtils.ESCAPE_CHAR);
pathStr.append(StringUtils.COMMA);
pathStr.append(StringUtils.COMMA);
pathStr.append('a');
path = new Path(workingDir, pathStr.toString());
FileInputFormat.setInputPaths(jobConf, path);
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(1, paths.length);
assertEquals(path.toString(), paths[0].toString());
pathStr.setLength(0);
pathStr.append(StringUtils.ESCAPE_CHAR);
pathStr.append("xx");
pathStr.append(StringUtils.ESCAPE_CHAR);
path = new Path(workingDir, pathStr.toString());
Path path1 = new Path(workingDir,
"yy"+StringUtils.COMMA_STR+"zz");
FileInputFormat.setInputPaths(jobConf, path);
FileInputFormat.addInputPath(jobConf, path1);
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(2, paths.length);
assertEquals(path.toString(), paths[0].toString());
assertEquals(path1.toString(), paths[1].toString());
FileInputFormat.setInputPaths(jobConf, path, path1);
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(2, paths.length);
assertEquals(path.toString(), paths[0].toString());
assertEquals(path1.toString(), paths[1].toString());
Path[] input = new Path[] {path, path1};
FileInputFormat.setInputPaths(jobConf, input);
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(2, paths.length);
assertEquals(path.toString(), paths[0].toString());
assertEquals(path1.toString(), paths[1].toString());
pathStr.setLength(0);
String str1 = "{a{b,c},de}";
String str2 = "xyz";
String str3 = "x{y,z}";
pathStr.append(str1);
pathStr.append(StringUtils.COMMA);
pathStr.append(str2);
pathStr.append(StringUtils.COMMA);
pathStr.append(str3);
FileInputFormat.setInputPaths(jobConf, pathStr.toString());
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(3, paths.length);
assertEquals(new Path(workingDir, str1).toString(), paths[0].toString());
assertEquals(new Path(workingDir, str2).toString(), paths[1].toString());
assertEquals(new Path(workingDir, str3).toString(), paths[2].toString());
pathStr.setLength(0);
String str4 = "abc";
String str5 = "pq{r,s}";
pathStr.append(str4);
pathStr.append(StringUtils.COMMA);
pathStr.append(str5);
FileInputFormat.addInputPaths(jobConf, pathStr.toString());
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(5, paths.length);
assertEquals(new Path(workingDir, str1).toString(), paths[0].toString());
assertEquals(new Path(workingDir, str2).toString(), paths[1].toString());
assertEquals(new Path(workingDir, str3).toString(), paths[2].toString());
assertEquals(new Path(workingDir, str4).toString(), paths[3].toString());
assertEquals(new Path(workingDir, str5).toString(), paths[4].toString());
}