本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getInputPaths方法的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat.getInputPaths方法的具体用法?Java FileInputFormat.getInputPaths怎么用?Java FileInputFormat.getInputPaths使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileInputFormat
的用法示例。
在下文中一共展示了FileInputFormat.getInputPaths方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: cut
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
public static void cut(String name, String in) throws Exception {
Job job = createJob(name, in);
job.setNumReduceTasks(0);
boolean success = job.waitForCompletion(true);
if (success) {
// MapReduce reads an input and writes the result to an new location.
// Hence it can not modify data in place, and we have to replace the input
// with the output
Path output = FileOutputFormat.getOutputPath(job);
FileSystem fs = output.getFileSystem(job.getConfiguration());
Path[] inputs = FileInputFormat.getInputPaths(job);
for (int i = 0; i < inputs.length; i++) {
fs.delete(inputs[i], true);
}
fs.rename(output, inputs[0]);
}
}
示例2: submit
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
/**
* Submit this job to mapred. The state becomes RUNNING if submission
* is successful, FAILED otherwise.
*/
protected synchronized void submit() {
try {
Configuration conf = job.getConfiguration();
if (conf.getBoolean(CREATE_DIR, false)) {
FileSystem fs = FileSystem.get(conf);
Path inputPaths[] = FileInputFormat.getInputPaths(job);
for (int i = 0; i < inputPaths.length; i++) {
if (!fs.exists(inputPaths[i])) {
try {
fs.mkdirs(inputPaths[i]);
} catch (IOException e) {
}
}
}
}
job.submit();
this.state = State.RUNNING;
} catch (Exception ioe) {
LOG.info(getJobName()+" got an error while submitting ",ioe);
this.state = State.FAILED;
this.message = StringUtils.stringifyException(ioe);
}
}
示例3: getSplits
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext context)
throws IOException {
Path[] paths = FileInputFormat.getInputPaths(context);
return FluentIterable.from(BaseInputFormat.getSplits(context.getConfiguration(), paths))
.transform(_fromSplit)
.toList();
}
示例4: getStatistics
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext = new JobContextImpl(configuration, null);
final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
示例5: getStatistics
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext = new JobContextImpl(configuration, null);
final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
public int run(String [] argv) throws Exception {
Job job = Job.getInstance(getConf());
job.setJarByClass(GenericMRLoadGenerator.class);
job.setMapperClass(SampleMapper.class);
job.setReducerClass(SampleReducer.class);
if (!parseArgs(argv, job)) {
return -1;
}
Configuration conf = job.getConfiguration();
if (null == FileOutputFormat.getOutputPath(job)) {
// No output dir? No writes
job.setOutputFormatClass(NullOutputFormat.class);
}
if (0 == FileInputFormat.getInputPaths(job).length) {
// No input dir? Generate random data
System.err.println("No input path; ignoring InputFormat");
confRandom(job);
} else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
// specified IndirectInputFormat? Build src list
JobClient jClient = new JobClient(conf);
Path tmpDir = new Path("/tmp");
Random r = new Random();
Path indirInputFile = new Path(tmpDir,
Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
SequenceFile.Writer writer = SequenceFile.createWriter(
tmpDir.getFileSystem(conf), conf, indirInputFile,
LongWritable.class, Text.class,
SequenceFile.CompressionType.NONE);
try {
for (Path p : FileInputFormat.getInputPaths(job)) {
FileSystem fs = p.getFileSystem(conf);
Stack<Path> pathstack = new Stack<Path>();
pathstack.push(p);
while (!pathstack.empty()) {
for (FileStatus stat : fs.listStatus(pathstack.pop())) {
if (stat.isDirectory()) {
if (!stat.getPath().getName().startsWith("_")) {
pathstack.push(stat.getPath());
}
} else {
writer.sync();
writer.append(new LongWritable(stat.getLen()),
new Text(stat.getPath().toUri().toString()));
}
}
}
}
} finally {
writer.close();
}
}
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " +
(endTime.getTime() - startTime.getTime()) /1000 +
" seconds.");
return ret;
}
示例7: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] strings) throws Exception {
Configuration conf = getConf();
String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
logger.info("input dir:" + dir);
Path inputPath = new Path(StringUtils.unEscapeString(dir));
Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
String indexPath = conf.get(LindenJobConfig.INDEX_PATH);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
if (fs.exists(new Path(indexPath))) {
fs.delete(new Path(indexPath), true);
}
int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
Shard[] shards = createShards(indexPath, numShards);
Shard.setIndexShards(conf, shards);
//empty trash;
(new Trash(conf)).expunge();
Job job = Job.getInstance(conf, "linden-hadoop-indexing");
job.setJarByClass(LindenJob.class);
job.setMapperClass(LindenMapper.class);
job.setCombinerClass(LindenCombiner.class);
job.setReducerClass(LindenReducer.class);
job.setMapOutputKeyClass(Shard.class);
job.setMapOutputValueClass(IntermediateForm.class);
job.setOutputKeyClass(Shard.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(IndexUpdateOutputFormat.class);
job.setReduceSpeculativeExecution(false);
job.setNumReduceTasks(numShards);
String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
if (lindenSchemaFile == null) {
throw new IOException("no schema file is found");
}
logger.info("Adding schema file: " + lindenSchemaFile);
job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
if (lindenPropertiesFile == null) {
throw new IOException("no linden properties file is found");
}
logger.info("Adding linden properties file: " + lindenPropertiesFile);
job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
Path[] inputs = FileInputFormat.getInputPaths(job);
StringBuilder buffer = new StringBuilder(inputs[0].toString());
for (int i = 1; i < inputs.length; i++) {
buffer.append(",");
buffer.append(inputs[i].toString());
}
logger.info("mapreduce.input.dir = " + buffer.toString());
logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));
job.waitForCompletion(true);
if (!job.isSuccessful()) {
throw new RuntimeException("Job failed");
}
return 0;
}