本文整理汇总了Java中cascading.flow.hadoop2.Hadoop2MR1FlowConnector类的典型用法代码示例。如果您正苦于以下问题:Java Hadoop2MR1FlowConnector类的具体用法?Java Hadoop2MR1FlowConnector怎么用?Java Hadoop2MR1FlowConnector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Hadoop2MR1FlowConnector类属于cascading.flow.hadoop2包,在下文中一共展示了Hadoop2MR1FlowConnector类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: fieldsCountGreaterThanColumnsTest
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
* Tests if correct number of input fields are provided.
*/
@Test(expected = RuntimeException.class)
public void fieldsCountGreaterThanColumnsTest() {
String sourcePath = "src/test/resources/input/with-headers.txt";
String sinkPath = "src/test/resources/output/sink-with-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat(',')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Fields sourceFields = new Fields("id", "last name", "first name", "phone");
Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例2: testWhenExtraColumnsStrict
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrict() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns.txt";
String sinkPath = "src/test/resources/input/sink-with-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withHeader("id", "first name", "last name", "city", "zip")
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例3: testWhenExtraColumnsStrictNoHeaders
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrictNoHeaders() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
String sinkPath = "src/test/resources/input/sink-no-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例4: testSchemeFields
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
* Helper method used for assertion of fields generated by CsvScheme.
*/
@SuppressWarnings("unchecked")
private void testSchemeFields(String sourcePath, CsvScheme sourceSchema, String sinkPath, CsvScheme sinkScheme, Set<String> expected) {
Tap source = new Hfs(sourceSchema, sourcePath);
Tap sink = new Hfs(sinkScheme, sinkPath);
Pipe pipe = new Pipe("pipe");
FlowConnector connector = new Hadoop2MR1FlowConnector();
connector.connect(source, sink, pipe).complete();
Fields sinkFields = sink.getSinkFields();
for (int i = 0; i < sinkFields.size(); i++) {
assertTrue("Unexpected column " + sinkFields.get(i), expected.contains(sinkFields.get(i)));
expected.remove(sinkFields.get(i));
}
assertTrue("Not all expected values are found", expected.isEmpty());
}
示例5: headerCountMismatchColumnsTest
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
* Tests if correct number of input headers are provided.
*/
@Test(expected = RuntimeException.class)
public void headerCountMismatchColumnsTest() {
String sourcePath = "src/test/resources/input/with-headers.txt";
String sinkPath = "src/test/resources/output/sink-with-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat(',')
.withQuote('"')
.withHeader("id", "first name", "last name", "phone")
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例6: fieldsIncludedButNotMatchLengthTest
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
* Tests if subset of input fields are provided, properly outputs only that subset.
*/
@Test
public void fieldsIncludedButNotMatchLengthTest() throws Exception {
String sourcePath = "src/test/resources/input/with-headers.txt";
String sinkPath = "src/test/resources/output/sink-with-headers";
String expectedPath = "src/test/resources/expected/sink-with-headers-id-only.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat(',')
.withHeader("id", "first name", "last name")
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Fields sourceFields = new Fields("id");
Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
testPaths(sinkPath, expectedPath);
}
示例7: testWhenFieldsAndHeadersAreinDifferentOrder
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenFieldsAndHeadersAreinDifferentOrder() throws Exception {
String sourcePath = "src/test/resources/input/with-headers.txt";
String sinkPath = "src/test/resources/output/sink-with-headers";
String expectedPath = "src/test/resources/expected/with-headers-difforder.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat(',')
.withQuote('"')
.withHeader("id", "first name", "last name")
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Fields sourceFields = new Fields("id", "last name", "first name");
Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
testPaths(sinkPath, expectedPath);
}
示例8: testWhenExtraColumnsNotStrict
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrict() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns.txt";
String sinkPath = "src/test/resources/input/sink-with-headers";
String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt";
String trapPath = "src/test/resources/input/trap-sink-with-headers";
String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withHeader("id", "first name", "last name", "city", "zip")
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete();
testPaths(sinkPath, expectedPath);
testPaths(trapPath, expectedTrapPath);
}
示例9: testWhenExtraColumnsNotStrictNoHeaders
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
String sinkPath = "src/test/resources/input/sink-no-headers";
String trapPath = "src/test/resources/input/trap-no-headers";
String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt";
String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete();
testPaths(sinkPath, expectedPath);
testPaths(trapPath, expectedTrapPath);
}
示例10: readWriteInFlowMR1
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void readWriteInFlowMR1() throws IOException {
try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
.addField("a", TypeInfoFactory.stringTypeInfo)
.addField("b", TypeInfoFactory.stringTypeInfo)
.build()) {
writer.addRow("A1", "B1");
writer.addRow("A2", "B2");
}
String output = new File(temporaryFolder.getRoot(), "output").getCanonicalPath();
Pipe pipe = new Pipe(UUID.randomUUID().toString());
FlowDef flowDef = FlowDef
.flowDef()
.setName(UUID.randomUUID().toString())
.addSource(pipe, new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path))
.addTailSink(pipe, new Hfs(OrcFile.sink().schema(FIELDS_AB).build(), output));
Flow<?> flow = new Hadoop2MR1FlowConnector(HadoopUtil.createProperties(conf)).connect(flowDef);
flow.complete();
flow.cleanup();
try (OrcReader reader = new OrcReader(conf, new Path(output, "part-00000"))) {
assertThat(reader.hasNext(), is(true));
List<Object> list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A1"));
assertThat(list.get(1), is((Object) "B1"));
assertThat(reader.hasNext(), is(true));
list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A2"));
assertThat(list.get(1), is((Object) "B2"));
assertThat(reader.hasNext(), is(false));
}
}
示例11: testScheme
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
* Tests a source and sink scheme together.
*/
private void testScheme(String sourcePath, CSVFormat sourceFormat, String sinkPath, CSVFormat sinkFormat, String expectedPath, boolean strict) throws Exception {
FlowConnector connector = new Hadoop2MR1FlowConnector();
Tap source = new Hfs(new CsvScheme(sourceFormat, strict), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
testPaths(sinkPath, expectedPath);
}