本文整理汇总了Java中cascading.flow.FlowDef类的典型用法代码示例。如果您正苦于以下问题:Java FlowDef类的具体用法?Java FlowDef怎么用?Java FlowDef使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FlowDef类属于cascading.flow包,在下文中一共展示了FlowDef类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCreateCommonCrawlFlowDef
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testCreateCommonCrawlFlowDef() throws Exception {
Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
String sourcePath = properties.getProperty("inPath");
String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput");
String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation");
// create the Cascading "source" (input) tap to read the commonCrawl WAT file(s)
Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath);
// create the Cascading "sink" (output) tap to dump the results
Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath);
//Build the Cascading Flow Definition
FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink);
new LocalFlowConnector(properties).connect(flowDef).complete();
Assert.sameContent(sinkPath, sinkValidationPath);
}
示例2: main
import cascading.flow.FlowDef; //导入依赖的package包/类
public static void main(String... args) {
String inPath = args[0];
String outPath = args[1];
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main.class);
HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
// create the source tap
Tap inTap = new Hfs(new TextDelimited(true, "\t"), inPath);
// create the sink tap
Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);
// specify a pipe to connect the taps
Pipe copyPipe = new Pipe("copy");
// connect the taps, pipes, etc., into a flow
FlowDef flowDef = FlowDef.flowDef().addSource(copyPipe, inTap).addTailSink(copyPipe, outTap);
// run the flow
flowConnector.connect(flowDef).complete();
}
示例3: testCascadeConnector
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testCascadeConnector() {
Pipe copy = new Pipe("copy");
Properties cfg = new TestSettings().getProperties();
FlowDef flow = new FlowDef().addSource(copy, sourceTap()).addTailSink(copy,
new EsTap("cascading-local/cascade-connector"));
FlowConnector connector = new LocalFlowConnector(cfg);
Flow[] flows = new Flow[] { connector.connect(flow) };
CascadeConnector cascadeConnector = new CascadeConnector(cfg);
cascadeConnector.connect(flows).complete();
}
示例4: testWriteToES
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testWriteToES() throws Exception {
Tap in = sourceTap();
Tap out = new EsTap("cascading-hadoop/artists", new Fields("name", "url", "picture"));
Pipe pipe = new Pipe("copy");
FlowDef flowDef = FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out);
StatsUtils.proxy(new HadoopFlowConnector(HdpBootstrap.asProperties(CascadingHadoopSuite.configuration)).connect(flowDef)).complete();
}
示例5: testCascadeConnector
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testCascadeConnector() {
Pipe copy = new Pipe("copy");
Properties cfg = HdpBootstrap.asProperties(CascadingHadoopSuite.configuration);
FlowDef flow = new FlowDef().addSource(copy, sourceTap())
.addTailSink(copy, new EsTap("cascading-hadoop/cascade-connector"));
FlowConnector connector = new HadoopFlowConnector(cfg);
Flow[] flows = new Flow[] { connector.connect(flow) };
CascadeConnector cascadeConnector = new CascadeConnector(cfg);
cascadeConnector.connect(flows).complete();
}
示例6: main
import cascading.flow.FlowDef; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length < 2) {
throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
}
Fields token = new Fields( "token", String.class );
Fields text = new Fields( "text" );
RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
// only returns "token"
Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );
Pipe wcPipe = new Pipe( "wc", docPipe );
wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));
Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);
FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
.addSource( docPipe, inTap )
.addTailSink( wcPipe, outTap );
FlowConnector flowConnector = new FlinkConnector();
Flow wcFlow = flowConnector.connect( flowDef );
wcFlow.complete();
}
示例7: readWriteInFlowMR1
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void readWriteInFlowMR1() throws IOException {
try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
.addField("a", TypeInfoFactory.stringTypeInfo)
.addField("b", TypeInfoFactory.stringTypeInfo)
.build()) {
writer.addRow("A1", "B1");
writer.addRow("A2", "B2");
}
String output = new File(temporaryFolder.getRoot(), "output").getCanonicalPath();
Pipe pipe = new Pipe(UUID.randomUUID().toString());
FlowDef flowDef = FlowDef
.flowDef()
.setName(UUID.randomUUID().toString())
.addSource(pipe, new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path))
.addTailSink(pipe, new Hfs(OrcFile.sink().schema(FIELDS_AB).build(), output));
Flow<?> flow = new Hadoop2MR1FlowConnector(HadoopUtil.createProperties(conf)).connect(flowDef);
flow.complete();
flow.cleanup();
try (OrcReader reader = new OrcReader(conf, new Path(output, "part-00000"))) {
assertThat(reader.hasNext(), is(true));
List<Object> list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A1"));
assertThat(list.get(1), is((Object) "B1"));
assertThat(reader.hasNext(), is(true));
list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A2"));
assertThat(list.get(1), is((Object) "B2"));
assertThat(reader.hasNext(), is(false));
}
}
示例8: readWriteInFlowTez
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void readWriteInFlowTez() throws IOException {
try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
.addField("a", TypeInfoFactory.stringTypeInfo)
.addField("b", TypeInfoFactory.stringTypeInfo)
.build()) {
writer.addRow("A1", "B1");
writer.addRow("A2", "B2");
}
String output = new File(temporaryFolder.getRoot(), "output").getCanonicalPath();
Pipe pipe = new Pipe(UUID.randomUUID().toString());
FlowDef flowDef = FlowDef
.flowDef()
.setName(UUID.randomUUID().toString())
.addSource(pipe, new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path))
.addTailSink(pipe, new Hfs(OrcFile.sink().schema(FIELDS_AB).build(), output));
Flow<?> flow = new Hadoop2TezFlowConnector(HadoopUtil.createProperties(conf)).connect(flowDef);
flow.complete();
flow.cleanup();
try (OrcReader reader = new OrcReader(conf, new Path(output, "part-v000-o000-00000"))) {
assertThat(reader.hasNext(), is(true));
List<Object> list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A1"));
assertThat(list.get(1), is((Object) "B1"));
assertThat(reader.hasNext(), is(true));
list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A2"));
assertThat(list.get(1), is((Object) "B2"));
assertThat(reader.hasNext(), is(false));
}
}
示例9: main
import cascading.flow.FlowDef; //导入依赖的package包/类
public static void main(String args[]) {
Properties properties = null;
try {
properties = new ConfigReader().renderProperties(Main.class);
if (args[0] != null && args[0].length() > 0){
properties.put("inPath", args[0]);
}
} catch (IOException e) {
System.out.println("Could not read your config.properties file");e.printStackTrace();
}
FlowDef flowDef = buildFlowDef(properties);
new HadoopFlowConnector(properties).connect(flowDef).complete();
}
示例10: testMain
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testMain() throws IOException {
Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties);
if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){
//Using cascading Local connector to exclude Hadoop and just test the logic
new LocalFlowConnector(properties).connect(flowDef).complete();
}
else {
new HadoopFlowConnector(properties).connect(flowDef).complete();
}
}
示例11: testComplete
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testComplete() throws Exception {
Bucket sink = new Bucket();
Fields inFields = Fields.join(FIELD_S, FIELD_X, FIELD_Y);
TupleListTap source = new DataBuilder(inFields)
.addTuple("A", "a", "za")
.addTuple("B", "b", "zb")
.addTuple("AA", "aa", "zaa")
.addTuple("BB", "bb", "zbb")
.toTap();
FlowDef flowDef = defineFlow(source, sink);
new LocalFlowConnector().connect(flowDef).complete();
List<TupleEntry> tupleEntries = sink.result().asTupleEntryList();
assertThat(tupleEntries.get(0).getString(FIELD_S), is("A"));
assertThat(tupleEntries.get(0).getString(FIELD_Y), is("za"));
assertThat(tupleEntries.get(0).getString(FIELD_V), is("a"));
assertThat(tupleEntries.get(1).getString(FIELD_S), is("AA"));
assertThat(tupleEntries.get(1).getString(FIELD_Y), is("zaa"));
assertThat(tupleEntries.get(1).getString(FIELD_V), is("aa"));
assertThat(tupleEntries.get(2).getString(FIELD_S), is("B"));
assertThat(tupleEntries.get(3).getString(FIELD_S), is("BB"));
assertThat(tupleEntries.get(3).getString(FIELD_Y), is("zbb"));
assertThat(tupleEntries.get(3).getString(FIELD_V), is("bb"));
}
示例12: main
import cascading.flow.FlowDef; //导入依赖的package包/类
public static void main(String[] args) {
String salesPath = args[0];
String storePath = args[1];
String outPath = args[2];
String date = "2452229";
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main.class);
HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
Tap salesTap = new Hfs(new ORCFile(null, "0,7"), salesPath);
Tap storeTap = new Hfs(new AvroScheme(), storePath);
Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);
Pipe salesPipe = new Each("sales", new Fields("solddatesk"), new DateFilter(Integer.valueOf(date)));
Pipe storePipe = new Pipe("store");
Pipe joinPipe = new HashJoin(salesPipe, new Fields("storesk"), storePipe, new Fields("storeSk"));
// _col24 is state_name
Pipe countPipe = new CountBy(joinPipe, new Fields("state"),
new Fields("item_count"));
FlowDef flowDef = FlowDef.flowDef().setName("count")
.addSource(salesPipe, salesTap)
.addSource(storePipe, storeTap)
.addTailSink(countPipe, outTap);
//.addTailSink(joinPipe, outTap);
Flow countFlow = flowConnector.connect(flowDef);
countFlow.complete();
}
示例13: testCascadeConnector
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testCascadeConnector() {
Pipe copy = new Pipe("copy");
Properties cfg = new TestSettings().getProperties();
FlowDef flow = new FlowDef().addSource(copy, sourceTap()).addTailSink(copy,
new EsTap("cascading-local-cascade-connector/data"));
FlowConnector connector = new LocalFlowConnector(cfg);
Flow[] flows = new Flow[] { connector.connect(flow) };
CascadeConnector cascadeConnector = new CascadeConnector(cfg);
cascadeConnector.connect(flows).complete();
}
示例14: testWriteToES
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testWriteToES() throws Exception {
Tap in = sourceTap();
Tap out = new EsTap("cascading-hadoop-artists/data", new Fields("name", "url", "picture"));
Pipe pipe = new Pipe("copy");
FlowDef flowDef = FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out);
StatsUtils.proxy(new HadoopFlowConnector(HdpBootstrap.asProperties(CascadingHadoopSuite.configuration)).connect(flowDef)).complete();
}
示例15: testCascadeConnector
import cascading.flow.FlowDef; //导入依赖的package包/类
@Test
public void testCascadeConnector() {
Pipe copy = new Pipe("copy");
Properties cfg = HdpBootstrap.asProperties(CascadingHadoopSuite.configuration);
FlowDef flow = new FlowDef().addSource(copy, sourceTap())
.addTailSink(copy, new EsTap("cascading-hadoop-cascade-connector/data"));
FlowConnector connector = new HadoopFlowConnector(cfg);
Flow[] flows = new Flow[] { connector.connect(flow) };
CascadeConnector cascadeConnector = new CascadeConnector(cfg);
cascadeConnector.connect(flows).complete();
}