本文整理汇总了Java中org.elasticsearch.hadoop.mr.EsOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java EsOutputFormat类的具体用法?Java EsOutputFormat怎么用?Java EsOutputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EsOutputFormat类属于org.elasticsearch.hadoop.mr包,在下文中一共展示了EsOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: Run
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
public static void Run(String input, Configuration conf)
throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(conf);
// job.setJobName(Hdfs2es.class.getName());
job.setJarByClass(Hdfs2es.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(MapTask.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(EsOutputFormat.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(input));
job.setSpeculativeExecution(false);
job.waitForCompletion(true);
}
示例2: sinkConfInit
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
conf.setOutputFormat(EsOutputFormat.class);
// define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
Settings set = loadSettings(conf, false);
Log log = LogFactory.getLog(EsTap.class);
InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);
// NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
//conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());
if (log.isTraceEnabled()) {
log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
}
}
示例3: init
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
private void init(TableDesc tableDesc, boolean read) {
Configuration cfg = getConf();
// NB: we can't just merge the table properties in, we need to save them per input/output otherwise clashes occur which confuse Hive
Settings settings = HadoopSettingsManager.loadFrom(cfg);
//settings.setProperty((read ? HiveConstants.INPUT_TBL_PROPERTIES : HiveConstants.OUTPUT_TBL_PROPERTIES), IOUtils.propsToString(tableDesc.getProperties()));
if (read) {
// no generic setting
}
else {
// replace the default committer when using the old API
HadoopCfgUtils.setOutputCommitterClass(cfg, EsOutputFormat.EsOutputCommitter.class.getName());
}
Assert.hasText(tableDesc.getProperties().getProperty(TABLE_LOCATION), String.format(
"no table location [%s] declared by Hive resulting in abnormal execution;", TABLE_LOCATION));
}
示例4: testBasicMultiSave
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Test
public void testBasicMultiSave() throws Exception {
JobConf conf = createJobConf();
conf.set(ConfigurationOptions.ES_RESOURCE, "oldapi/multi-save");
MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class);
MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class);
//MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class);
PrintStreamOutputFormat.stream(conf, Stream.OUT);
//conf.set("mapred.output.dir", "foo/bar");
//FileOutputFormat.setOutputPath(conf, new Path("foo/bar"));
conf.setClass("mapred.output.format.class", MultiOutputFormat.class, OutputFormat.class);
runJob(conf);
}
示例5: run
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception{
Configuration conf = super.getConf();
optParser(args);
conf.set("es.nodes", this.servers);
conf.set("prefix",this.prefix);
conf.set("es.resource", this.index + "/{"+this.prefix+"SiteName}");
conf.set("es.mapping.id",this.prefix+"Id");
Job job = Job.getInstance(conf,"Description");
job.setJarByClass(EsFeeder.class);
job.setMapperClass(datacentermr.EsFeederMapper.class);
job.setSpeculativeExecution(false);
job.setOutputFormatClass(EsOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(MapWritable.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(this.input));
System.exit(job.waitForCompletion(true) ? 0 : 1);
return 0;
}
示例6: testBasicMultiSave
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Test
public void testBasicMultiSave() throws Exception {
JobConf conf = createJobConf();
conf.set(ConfigurationOptions.ES_RESOURCE, "oldapi-multi-save/data");
MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class);
MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class);
//MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class);
PrintStreamOutputFormat.stream(conf, Stream.OUT);
//conf.set("mapred.output.dir", "foo/bar");
//FileOutputFormat.setOutputPath(conf, new Path("foo/bar"));
conf.setClass("mapred.output.format.class", MultiOutputFormat.class, OutputFormat.class);
runJob(conf);
}
示例7: configs
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Parameters
public static Collection<Object[]> configs() throws IOException {
JobConf conf = HdpBootstrap.hadoopConfig();
conf.setInputFormat(SplittableTextInputFormat.class);
conf.setOutputFormat(EsOutputFormat.class);
conf.setReducerClass(IdentityReducer.class);
HadoopCfgUtils.setGenericOptions(conf);
conf.setNumMapTasks(2);
conf.setInt("actual.splits", 2);
conf.setNumReduceTasks(0);
JobConf standard = new JobConf(conf);
standard.setMapperClass(TabMapper.class);
standard.setMapOutputValueClass(LinkedMapWritable.class);
standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
FileInputFormat.setInputPaths(standard, new Path(TestUtils.gibberishDat(conf)));
JobConf json = new JobConf(conf);
json.setMapperClass(IdentityMapper.class);
json.setMapOutputValueClass(Text.class);
json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
FileInputFormat.setInputPaths(json, new Path(TestUtils.gibberishJson(conf)));
return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } });
}
示例8: configs
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Parameters
public static Collection<Object[]> configs() throws IOException {
Configuration conf = HdpBootstrap.hadoopConfig();
HadoopCfgUtils.setGenericOptions(conf);
Job job = new Job(conf);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(EsOutputFormat.class);
job.setMapOutputValueClass(LinkedMapWritable.class);
job.setMapperClass(TabMapper.class);
job.setNumReduceTasks(0);
Job standard = new Job(job.getConfiguration());
File fl = new File(TestUtils.sampleArtistsDat());
long splitSize = fl.length() / 3;
TextInputFormat.setMaxInputSplitSize(standard, splitSize);
TextInputFormat.setMinInputSplitSize(standard, 50);
standard.setMapperClass(TabMapper.class);
standard.setMapOutputValueClass(LinkedMapWritable.class);
TextInputFormat.addInputPath(standard, new Path(TestUtils.sampleArtistsDat(conf)));
Job json = new Job(job.getConfiguration());
json.setMapperClass(Mapper.class);
json.setMapOutputValueClass(Text.class);
json.getConfiguration().set(ConfigurationOptions.ES_INPUT_JSON, "true");
TextInputFormat.addInputPath(json, new Path(TestUtils.sampleArtistsJson(conf)));
return Arrays.asList(new Object[][] {
{ standard, "" },
{ json, "json-" } });
}
示例9: testBasicMultiSave
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Test
public void testBasicMultiSave() throws Exception {
Configuration conf = createConf();
conf.set(ConfigurationOptions.ES_RESOURCE, "mrnewapi/multi-save");
MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class);
MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class);
//MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class);
PrintStreamOutputFormat.stream(conf, Stream.OUT);
//conf.set("mapred.output.dir", "foo/bar");
conf.setClass("mapreduce.outputformat.class", MultiOutputFormat.class, OutputFormat.class);
runJob(conf);
}
示例10: configs
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Parameters
public static Collection<Object[]> configs() {
JobConf conf = HdpBootstrap.hadoopConfig();
conf.setInputFormat(SplittableTextInputFormat.class);
conf.setOutputFormat(EsOutputFormat.class);
conf.setReducerClass(IdentityReducer.class);
HadoopCfgUtils.setGenericOptions(conf);
conf.setNumMapTasks(2);
conf.setInt("actual.splits", 2);
conf.setNumReduceTasks(0);
JobConf standard = new JobConf(conf);
standard.setMapperClass(TabMapper.class);
standard.setMapOutputValueClass(LinkedMapWritable.class);
standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
FileInputFormat.setInputPaths(standard, new Path(TestUtils.sampleArtistsDat(conf)));
JobConf json = new JobConf(conf);
json.setMapperClass(IdentityMapper.class);
json.setMapOutputValueClass(Text.class);
json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
FileInputFormat.setInputPaths(json, new Path(TestUtils.sampleArtistsJson(conf)));
return Arrays.asList(new Object[][] {
{ standard, "" },
{ json, "json-" }
});
}
示例11: runMrJob
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
/**
* 执行MrJob,elasticsearch自动索引
* @throws IOException
*/
public static void runMrJob () throws IOException {
JobConf conf = new JobConf();
conf.set("es.nodes", "192.168.13.134:9200");//设置es地址
conf.set("es.resource", "docindex/attachment");//设置index位置
conf.set("es.mapping.id", "file");//设置mapping的id
conf.set("es.input.json", "yes");//设置json输入格式
conf.setOutputFormat(EsOutputFormat.class);//设置输出格式
conf.setMapOutputValueClass(Text.class);//设置输出value格式
conf.setMapperClass(EsMapper.class);//设置MapperClass
JobClient.runJob(conf);//执行job,将json文件写入elasticsearch,elasticsearch会自动建索引
}
示例12: ESEntityExtractor
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
public ESEntityExtractor(Class<T> t) {
super();
this.deepJobConfig = new ESDeepJobConfig(t);
this.inputFormat = new EsInputFormat<>();
this.outputFormat = new EsOutputFormat();
}
示例13: testBasicMultiSave
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Test
public void testBasicMultiSave() throws Exception {
Configuration conf = createConf();
conf.set(ConfigurationOptions.ES_RESOURCE, "mrnewapi-multi-save/data");
MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class);
MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class);
//MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class);
PrintStreamOutputFormat.stream(conf, Stream.OUT);
//conf.set("mapred.output.dir", "foo/bar");
conf.setClass("mapreduce.outputformat.class", MultiOutputFormat.class, OutputFormat.class);
runJob(conf);
}
示例14: getOutputCommitter
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
return new EsOutputFormat.EsOutputCommitter();
}
示例15: getOutputFormat
import org.elasticsearch.hadoop.mr.EsOutputFormat; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public OutputFormat<Object, Map<Writable, Writable>> getOutputFormat() throws IOException {
return new EsOutputFormat();
}