本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.setInputPaths方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileInputFormat.setInputPaths方法的具体用法?Java SequenceFileInputFormat.setInputPaths怎么用?Java SequenceFileInputFormat.setInputPaths使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat
的用法示例。
在下文中一共展示了SequenceFileInputFormat.setInputPaths方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
throws IOException
{
final Job job = new Job(config);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPaths);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
if (metadata != null)
MetadataConfiguration.setMetadata(metadata, job.getConfiguration());
return job;
}
示例2: run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create the output path
Path outpath = new Path(SequenceFileUtility.convertToURI(this.outputFilePath).toString());
System.out.println("It is all going to: " + outpath);
Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputFilePath, "part");
Job job = new Job(this.getConf(), "featureselect");
job.setNumReduceTasks(1);
job.setJarByClass(SequenceFileByteImageFeatureSelector.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setMapperClass(FeatureSelect.Map.class);
job.setReducerClass(FeatureSelect.Reduce.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.getConfiguration().setStrings(FeatureSelect.FILETYPE_KEY, new String[]{options.fileType});
job.getConfiguration().setStrings(FeatureSelect.NFEATURE_KEY, new String[]{"" + this.nRandomRows});
((JobConf)job.getConfiguration()).setNumTasksToExecutePerJvm(-1);
SequenceFileInputFormat.setInputPaths(job, sequenceFiles);
SequenceFileOutputFormat.setOutputPath(job, outpath);
SequenceFileOutputFormat.setCompressOutput(job, false);
job.waitForCompletion(true);
return 0;
}
示例3: getRandomRows
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
public Path getRandomRows(int k) throws IOException, InterruptedException, ClassNotFoundException {
// Create the output path
File tmpFile = File.createTempFile("feature",".select");
tmpFile.delete();
Path outpath = new Path(SequenceFileUtility.convertToURI(tmpFile.getAbsolutePath()).toString());
System.out.println("It is all going to: " + outpath);
Path[] sequenceFiles = SequenceFileUtility.getFilePaths(sequenceFilePath, "part");
Configuration conf = new Configuration();
Job job = new Job(conf, "featureselect");
job.setNumReduceTasks(1);
job.setJarByClass(SequenceFileByteDataSelector.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setMapperClass(FeatureSelect.Map.class);
// job.setCombinerClass(FeatureSelect.Reduce.class);
job.setReducerClass(FeatureSelect.Reduce.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
// job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().setStrings(FeatureSelect.FILETYPE_KEY, new String[]{fileType});
job.getConfiguration().setStrings(FeatureSelect.NFEATURE_KEY, new String[]{"" + k});
SequenceFileInputFormat.setInputPaths(job, sequenceFiles);
SequenceFileOutputFormat.setOutputPath(job, outpath);
SequenceFileOutputFormat.setCompressOutput(job, false);
// FileOutputFormat.setOutputPath(job, outpath);
job.waitForCompletion(true);
return outpath;
}
示例4: runIngestJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
private int runIngestJob() throws Exception
{
Job ingestJob = new Job(getConf(), "Ingest Partitioned Wikipedia");
Configuration ingestConf = ingestJob.getConfiguration();
ingestConf.set("mapred.map.tasks.speculative.execution", "false");
configureIngestJob(ingestJob);
String tablename = WikipediaConfiguration.getTableName(ingestConf);
Connector connector = WikipediaConfiguration.getConnector(ingestConf);
TableOperations tops = connector.tableOperations();
createTables(tops, tablename);
ingestJob.setMapperClass(WikipediaPartitionedMapper.class);
ingestJob.setNumReduceTasks(0);
// setup input format
ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
// TODO make split size configurable
SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));
// setup output format
ingestJob.setMapOutputKeyClass(Text.class);
ingestJob.setMapOutputValueClass(Mutation.class);
if(WikipediaConfiguration.bulkIngest(ingestConf))
{
ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class);
SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf));
String bulkIngestDir = WikipediaConfiguration.bulkIngestDir(ingestConf);
if(bulkIngestDir == null)
{
log.error("Bulk ingest dir not set");
return 1;
}
SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf));
} else {
ingestJob.setOutputFormatClass(AccumuloOutputFormat.class);
ClientConfiguration clientConfig = new ClientConfiguration();
clientConfig.setProperty(ClientProperty.INSTANCE_NAME, WikipediaConfiguration.getInstanceName(ingestConf));
clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, WikipediaConfiguration.getZookeepers(ingestConf));
String user = WikipediaConfiguration.getUser(ingestConf);
byte[] password = WikipediaConfiguration.getPassword(ingestConf);
AccumuloOutputFormat.setConnectorInfo(ingestJob, user, new PasswordToken(password));
AccumuloOutputFormat.setZooKeeperInstance(ingestJob, clientConfig);
}
return ingestJob.waitForCompletion(true) ? 0 : 1;
}
示例5: run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
SortConfig config = new SortConfig();
config.fromArray(args);
Job job = Job.getInstance(getConf());
job.setJobName("sort");
job.setJarByClass(SortDriver.class);
// define the path
Path inputPath = new Path(config.getInput());
Path partitionFilePath = new Path(config.getPartition());
Path outputPath = new Path(config.getOutput());
Path metaPath = new Path(config.getMeta());
LOGGER.info("use " + inputPath.toString() + " as sort input");
LOGGER.info("use " + partitionFilePath.toString() + " as partition");
LOGGER.info("use " + outputPath.toString() + " as sort output");
LOGGER.info("use " + metaPath.toString() + " as meta output");
// define the mapper
// use the identity mapper, which is the default implementation
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPath);
// define the reducer
job.getConfiguration().set(SortReducer.META_BASE_CONFIG_NAME, metaPath.toString());
job.setReducerClass(SortReducer.class);
job.setNumReduceTasks(NUM_REDUCER);
// use text for debug, use sequence is faster I guess
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
TextOutputFormat.setOutputPath(job, outputPath);
// set partition
job.setPartitionerClass(TotalOrderPartitioner.class);
TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFilePath);
// set the sampler
InputSampler.writePartitionFile(job, new InputSampler.RandomSampler(
1, 10000));
// set multiple output
MultipleOutputs.addNamedOutput(job, "meta", TextOutputFormat.class,
IntWritable.class, Text.class);
// clean up the old output path
outputPath.getFileSystem(job.getConfiguration()).delete(outputPath, true);
metaPath.getFileSystem(job.getConfiguration()).delete(metaPath, true);
// run the job and wait until it complete
return job.waitForCompletion(true) ? 0 : 1;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create the output path
Path outpath = new Path(this.outputFilePath);
System.out.println("It is all going to: " + outpath);
List<Path> sequenceFiles = new ArrayList<Path>();
for(String inputFilePath : this.inputFilePaths){
Path[] foundPaths = SequenceFileUtility.getFilePaths(inputFilePath, "part");
for(Path p : foundPaths){
sequenceFiles.add(p);
}
}
Job job = new Job(this.getConf(), "featureselect");
job.setJarByClass(SequenceFileByteImageFeatureSelector.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setMapperClass(ImageFeatureSelect.Map.class);
if(this.nRandomRows==-1){
job.setNumReduceTasks(0);
}
else{
job.setNumReduceTasks(1);
job.setReducerClass(ImageFeatureSelect.Reduce.class);
}
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.getConfiguration().setStrings(ImageFeatureSelect.FILETYPE_KEY, new String[]{options.fileType});
job.getConfiguration().setStrings(ImageFeatureSelect.NFEATURE_KEY, new String[]{"" + nRandomRows});
SequenceFileInputFormat.setInputPaths(job, sequenceFiles.toArray(new Path[sequenceFiles.size()]));
SequenceFileOutputFormat.setOutputPath(job, outpath);
SequenceFileOutputFormat.setCompressOutput(job, false);
job.waitForCompletion(true);
return 0;
}