本文整理汇总了Java中cascading.scheme.hadoop.TextDelimited类的典型用法代码示例。如果您正苦于以下问题:Java TextDelimited类的具体用法?Java TextDelimited怎么用?Java TextDelimited使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TextDelimited类属于cascading.scheme.hadoop包,在下文中一共展示了TextDelimited类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String... args) {
String inPath = args[0];
String outPath = args[1];
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main.class);
HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
// create the source tap
Tap inTap = new Hfs(new TextDelimited(true, "\t"), inPath);
// create the sink tap
Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);
// specify a pipe to connect the taps
Pipe copyPipe = new Pipe("copy");
// connect the taps, pipes, etc., into a flow
FlowDef flowDef = FlowDef.flowDef().addSource(copyPipe, inTap).addTailSink(copyPipe, outTap);
// run the flow
flowConnector.connect(flowDef).complete();
}
示例2: testWhenExtraColumnsNotStrict
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrict() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns.txt";
String sinkPath = "src/test/resources/input/sink-with-headers";
String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt";
String trapPath = "src/test/resources/input/trap-sink-with-headers";
String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withHeader("id", "first name", "last name", "city", "zip")
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete();
testPaths(sinkPath, expectedPath);
testPaths(trapPath, expectedTrapPath);
}
示例3: testWhenExtraColumnsNotStrictNoHeaders
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
String sinkPath = "src/test/resources/input/sink-no-headers";
String trapPath = "src/test/resources/input/trap-no-headers";
String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt";
String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete();
testPaths(sinkPath, expectedPath);
testPaths(trapPath, expectedTrapPath);
}
示例4: main
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length < 2) {
throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
}
Fields token = new Fields( "token", String.class );
Fields text = new Fields( "text" );
RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
// only returns "token"
Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );
Pipe wcPipe = new Pipe( "wc", docPipe );
wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));
Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);
FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
.addSource( docPipe, inTap )
.addTailSink( wcPipe, outTap );
FlowConnector flowConnector = new FlinkConnector();
Flow wcFlow = flowConnector.connect( flowDef );
wcFlow.complete();
}
示例5: main
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {
String salesPath = args[0];
String storePath = args[1];
String outPath = args[2];
String date = "2452229";
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main.class);
HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
Tap salesTap = new Hfs(new ORCFile(null, "0,7"), salesPath);
Tap storeTap = new Hfs(new AvroScheme(), storePath);
Tap outTap = new Hfs(new TextDelimited(true, "\t"), outPath);
Pipe salesPipe = new Each("sales", new Fields("solddatesk"), new DateFilter(Integer.valueOf(date)));
Pipe storePipe = new Pipe("store");
Pipe joinPipe = new HashJoin(salesPipe, new Fields("storesk"), storePipe, new Fields("storeSk"));
// _col24 is state_name
Pipe countPipe = new CountBy(joinPipe, new Fields("state"),
new Fields("item_count"));
FlowDef flowDef = FlowDef.flowDef().setName("count")
.addSource(salesPipe, salesTap)
.addSource(storePipe, storeTap)
.addTailSink(countPipe, outTap);
//.addTailSink(joinPipe, outTap);
Flow countFlow = flowConnector.connect(flowDef);
countFlow.complete();
}
示例6: main
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
public static void main(String[] args) {
String fooInputPath = args[0];
String barInputPath = args[1];
String outputPath = args[2];
int fooValMax = Integer.parseInt(args[3]);
int joinValMax = Integer.parseInt(args[4]);
int numberOfReducers = Integer.parseInt(args[5]);
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties,
JoinFilterExampleCascading.class);
properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
SpillableProps props = SpillableProps.spillableProps()
.setCompressSpill( true )
.setMapSpillThreshold( 50 * 1000 );
HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
// create source and sink taps
Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
Fields barFields = new Fields("barId", "barVal");
Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);
Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);
Fields joinFooFields = new Fields("foobarId");
Fields joinBarFields = new Fields("barId");
Pipe fooPipe = new Pipe("fooPipe");
Pipe barPipe = new Pipe("barPipe");
Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));
Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
joinBarFields);
props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
new JoinedFilter(joinValMax));
FlowDef flowDef = FlowDef.flowDef().setName("wc")
.addSource(fooPipe, fooTap).addSource(barPipe, barTap)
.addTailSink(joinedFilteredPipe, outputTap);
Flow wcFlow = flowConnector.connect(flowDef);
wcFlow.writeDOT("dot/wc.dot");
wcFlow.complete();
}
示例7: sourceTap
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
return new Hfs(new TextDelimited(new Fields("line")), INPUT);
}
示例8: sourceTap
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
return new Hfs(new TextDelimited(new Fields("id", "name", "url", "picture", "ts")), INPUT);
}
示例9: getDelimitedFile
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
@Override
public Tap getDelimitedFile(Fields fields, boolean hasHeader, String delimiter, String quote,
Class[] types, String filename, SinkMode mode) {
return new Hfs( new TextDelimited( fields, hasHeader, delimiter, quote, types ), filename, mode );
}
示例10: sourceTap
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
private Tap sourceTap() {
return new Hfs(new TextDelimited(new Fields("id", "name", "url", "picture", "ts", "tag")), INPUT);
}
示例11: tsv
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
static protected Scheme tsv(Fields fields) {
TextDelimited scheme = new TextDelimited(fields, true, true, "\t");
scheme.setNumSinkParts(10);
return scheme;
}
示例12: main
import cascading.scheme.hadoop.TextDelimited; //导入依赖的package包/类
static
public void main(String... args) throws Exception {
if(args.length != 3){
System.err.println("Usage: hadoop jar job.jar <PMML file> <HFS source> <HFS sink>");
System.exit(-1);
}
Evaluator evaluator = PMMLPlannerUtil.createEvaluator(new File(args[0]));
Properties properties = new Properties();
AppProps.setApplicationJarClass(properties, Main.class);
FlowConnector connector = new HadoopFlowConnector(properties);
FlowDef flowDef = FlowDef.flowDef();
Tap source = new Hfs(new TextDelimited(true, ","), args[1]);
flowDef = flowDef.addSource("input", source);
Tap sink = new Hfs(new TextDelimited(true, ","), args[2]);
flowDef = flowDef.addSink("output", sink);
PMMLPlanner pmmlPlanner = new PMMLPlanner(evaluator);
pmmlPlanner.setRetainOnlyActiveFields();
pmmlPlanner.setHeadName("input");
pmmlPlanner.setTailName("output");
flowDef = flowDef.addAssemblyPlanner(pmmlPlanner);
Flow<?> flow = connector.connect(flowDef);
flow.complete();
}