本文整理汇总了Java中org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java AccumuloFileOutputFormat类的具体用法?Java AccumuloFileOutputFormat怎么用?Java AccumuloFileOutputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
AccumuloFileOutputFormat类属于org.apache.accumulo.core.client.mapreduce包,在下文中一共展示了AccumuloFileOutputFormat类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getHadoopOF
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
/**
* creates output format to write data from flink DataSet to accumulo
* @return
* @throws AccumuloSecurityException
*/
public HadoopOutputFormat getHadoopOF() throws AccumuloSecurityException, IOException {
if(job == null){
job = Job.getInstance(new Configuration(), jobName);
}
AccumuloOutputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));
ClientConfiguration clientConfig = new ClientConfiguration();
clientConfig.withInstance(accumuloInstanceName);
clientConfig.withZkHosts(accumuloZookeeper);
AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);
AccumuloOutputFormat.setDefaultTableName(job, outTable);
AccumuloFileOutputFormat.setOutputPath(job,new Path("/tmp"));
HadoopOutputFormat<Text, Mutation> hadoopOF =
new HadoopOutputFormat<>(new AccumuloOutputFormat() , job);
return hadoopOF;
}
示例2: run
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) {
Opts opts = new Opts();
opts.parseArgs(BulkIngestExample.class.getName(), args);
Configuration conf = getConf();
PrintStream out = null;
try {
Job job = Job.getInstance(conf);
job.setJobName("bulk ingest example");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MapClass.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(ReduceClass.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
opts.setAccumuloConfigs(job);
Connector connector = opts.getConnector();
TextInputFormat.setInputPaths(job, new Path(opts.inputDir));
AccumuloFileOutputFormat.setOutputPath(job, new Path(opts.workDir + "/files"));
FileSystem fs = FileSystem.get(conf);
out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));
Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
for (Text split : splits)
out.println(Base64.getEncoder().encodeToString(TextUtil.getBytes(split)));
job.setNumReduceTasks(splits.size() + 1);
out.close();
job.setPartitionerClass(RangePartitioner.class);
RangePartitioner.setSplitFile(job, opts.workDir + "/splits.txt");
job.waitForCompletion(true);
Path failures = new Path(opts.workDir, "failures");
fs.delete(failures, true);
fs.mkdirs(new Path(opts.workDir, "failures"));
// With HDFS permissions on, we need to make sure the Accumulo user can read/move the rfiles
FsShell fsShell = new FsShell(conf);
fsShell.run(new String[] {"-chmod", "-R", "777", opts.workDir});
connector.tableOperations().importDirectory(opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (out != null)
out.close();
}
return 0;
}
示例3: bulkImportKvToAccumulo
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
/**
* Bulk import Key/Value data into specified Accumulo table. This method does not repartition
* data. One RFile will be created for each partition in the passed in RDD. Ensure the RDD is
* reasonably partitioned before calling this method.
*
* @param data Key/value data to import
* @param accumuloTable Accumulo table used for import
* @param opts Bulk import options
*/
public void bulkImportKvToAccumulo(JavaPairRDD<Key, Value> data, String accumuloTable,
BulkImportOptions opts) {
Path tempDir = getTempDir(opts);
Connector conn = chooseConnector(opts);
try {
if (hdfs.exists(tempDir)) {
throw new IllegalArgumentException("HDFS temp dir already exists: " + tempDir.toString());
}
hdfs.mkdirs(tempDir);
Path dataDir = new Path(tempDir.toString() + "/data");
Path failDir = new Path(tempDir.toString() + "/fail");
hdfs.mkdirs(failDir);
// save data to HDFS
Job job = Job.getInstance(hadoopConfig);
AccumuloFileOutputFormat.setOutputPath(job, dataDir);
// must use new API here as saveAsHadoopFile throws exception
data.saveAsNewAPIHadoopFile(dataDir.toString(), Key.class, Value.class,
AccumuloFileOutputFormat.class, job.getConfiguration());
// bulk import data to Accumulo
log.info("Wrote data for bulk import to HDFS temp directory: {}", dataDir);
conn.tableOperations().importDirectory(accumuloTable, dataDir.toString(), failDir.toString(),
false);
// throw exception if failures directory contains files
if (hdfs.listFiles(failDir, true).hasNext()) {
throw new IllegalStateException("Bulk import failed! Found files that failed to import "
+ "in failures directory: " + failDir);
}
log.info("Successfully bulk imported data in {} to '{}' Accumulo table", dataDir,
accumuloTable);
// delete data directory
hdfs.delete(tempDir, true);
log.info("Deleted HDFS temp directory created for bulk import: {}", tempDir);
// @formatter:off
} catch (IOException | TableNotFoundException | AccumuloException
| AccumuloSecurityException e) {
// @formatter:on
throw new IllegalStateException(e);
}
}
示例4: setupOutput
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
protected void setupOutput(final Job job, final AddElementsFromHdfs operation) {
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
}
示例5: shouldSetupJob
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Test
public void shouldSetupJob() throws IOException {
// Given
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
writer.write("1");
}
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = mock(Job.class);
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
.outputPath(outputDir)
.addInputMapperPair(inputDir, TextMapperGeneratorImpl.class.getName())
.useProvidedSplits(true)
.splitsFilePath(splitsFile)
.build();
final AccumuloStore store = mock(AccumuloStore.class);
given(job.getConfiguration()).willReturn(localConf);
// When
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
verify(job).setJarByClass(factory.getClass());
verify(job).setJobName(String.format(AccumuloAddElementsFromHdfsJobFactory.INGEST_HDFS_DATA_GENERATOR_S_OUTPUT_S, TextMapperGeneratorImpl.class.getName(), outputDir));
verify(job).setMapperClass(AddElementsFromHdfsMapper.class);
verify(job).setMapOutputKeyClass(Key.class);
verify(job).setMapOutputValueClass(Value.class);
verify(job).setCombinerClass(AccumuloKeyValueReducer.class);
verify(job).setReducerClass(AccumuloKeyValueReducer.class);
verify(job).setOutputKeyClass(Key.class);
verify(job).setOutputValueClass(Value.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
assertEquals(fs.makeQualified(new Path(outputDir)).toString(), job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir"));
verify(job).setNumReduceTasks(2);
verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
}
示例6: prepareKeyValues
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Override
protected void prepareKeyValues(final ImportKeyValuePairRDDToAccumulo operation, final AccumuloKeyRangePartitioner partitioner) throws OperationException {
final OrderedRDDFunctions orderedRDDFunctions = new OrderedRDDFunctions(operation.getInput(), ORDERING_CLASS_TAG, KEY_CLASS_TAG, VALUE_CLASS_TAG, scala.reflect.ClassTag$.MODULE$.apply(Tuple2.class));
final PairRDDFunctions pairRDDFunctions = new PairRDDFunctions(orderedRDDFunctions.repartitionAndSortWithinPartitions(partitioner), KEY_CLASS_TAG, VALUE_CLASS_TAG, ORDERING_CLASS_TAG);
pairRDDFunctions.saveAsNewAPIHadoopFile(operation.getOutputPath(), Key.class, Value.class, AccumuloFileOutputFormat.class, getConfiguration(operation));
}
示例7: prepareKeyValues
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Override
protected void prepareKeyValues(final ImportKeyValueJavaPairRDDToAccumulo operation, final AccumuloKeyRangePartitioner partitioner) throws OperationException {
final JavaPairRDD<Key, Value> rdd = operation.getInput().repartitionAndSortWithinPartitions(partitioner);
rdd.saveAsNewAPIHadoopFile(operation.getOutputPath(), Key.class, Value.class, AccumuloFileOutputFormat.class, getConfiguration(operation));
}
示例8: run
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Override
public int run(
final String[] args )
throws Exception {
final Configuration conf = getConf();
conf.set(
"fs.defaultFS",
"file:///");
final Job job = Job.getInstance(
conf,
JOB_NAME);
job.setJarByClass(getClass());
FileInputFormat.setInputPaths(
job,
new Path(
TEST_DATA_LOCATION));
FileOutputFormat.setOutputPath(
job,
cleanPathForReuse(
conf,
OUTPUT_PATH));
job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class);
job.setReducerClass(Reducer.class); // (Identity Reducer)
job.setInputFormatClass(GeonamesDataFileInputFormat.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
job.setMapOutputKeyClass(Key.class);
job.setMapOutputValueClass(Value.class);
job.setOutputKeyClass(Key.class);
job.setOutputValueClass(Value.class);
job.setNumReduceTasks(1);
job.setSpeculativeExecution(false);
final boolean result = job.waitForCompletion(true);
mapInputRecords = job.getCounters().findCounter(
TASK_COUNTER_GROUP_NAME,
MAP_INPUT_RECORDS).getValue();
mapOutputRecords = job.getCounters().findCounter(
TASK_COUNTER_GROUP_NAME,
MAP_OUTPUT_RECORDS).getValue();
return result ? 0 : 1;
}
示例9: testImportFile
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat; //导入依赖的package包/类
@Test
public void testImportFile() throws Exception {
File inDir = new File(tempFolder.getRoot(), "in");
inDir.mkdir();
File outDir = new File(tempFolder.getRoot(), "out");
File failDir = new File(tempFolder.getRoot(), "fail");
failDir.mkdir();
// generate some data for map reduce to read
PrintWriter writer =
new PrintWriter(new File(inDir, "file1.txt"), StandardCharsets.UTF_8.name());
writer.println("a,b,c,1");
writer.println("d,b,c,2");
writer.println("foo,moo,moo,90");
writer.close();
// run map reduce job to generate rfiles
JobConf jconf = new JobConf();
jconf.set("mapred.job.tracker", "true");
jconf.set("fs.defaultFS", "file:///");
@SuppressWarnings("deprecation")
Job job = new Job(jconf);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, inDir.toURI().toString());
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
AccumuloFileOutputFormat.setOutputPath(job, new Path(outDir.toURI()));
job.setMapperClass(TestMapper.class);
job.setNumReduceTasks(0);
Assert.assertTrue(job.waitForCompletion(false));
// bulk import rfiles
conn.tableOperations().importDirectory(table, outDir.toString(), failDir.toString(), false);
// read and update data using transactions
TestTransaction tx1 = new TestTransaction(env);
TestTransaction tx2 = new TestTransaction(env);
Assert.assertEquals("1", tx1.gets("a", new Column("b", "c")));
Assert.assertEquals("2", tx1.gets("d", new Column("b", "c")));
Assert.assertEquals("90", tx1.gets("foo", new Column("moo", "moo")));
tx1.set("a", new Column("b", "c"), "3");
tx1.delete("d", new Column("b", "c"));
tx1.done();
// should not see changes from tx1
Assert.assertEquals("1", tx2.gets("a", new Column("b", "c")));
Assert.assertEquals("2", tx2.gets("d", new Column("b", "c")));
Assert.assertEquals("90", tx2.gets("foo", new Column("moo", "moo")));
TestTransaction tx3 = new TestTransaction(env);
// should see changes from tx1
Assert.assertEquals("3", tx3.gets("a", new Column("b", "c")));
Assert.assertNull(tx3.gets("d", new Column("b", "c")));
Assert.assertEquals("90", tx3.gets("foo", new Column("moo", "moo")));
}