当前位置: 首页>>代码示例>>Java>>正文


Java Hadoop2MR1FlowConnector类代码示例

本文整理汇总了Java中cascading.flow.hadoop2.Hadoop2MR1FlowConnector的典型用法代码示例。如果您正苦于以下问题:Java Hadoop2MR1FlowConnector类的具体用法?Java Hadoop2MR1FlowConnector怎么用?Java Hadoop2MR1FlowConnector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Hadoop2MR1FlowConnector类属于cascading.flow.hadoop2包,在下文中一共展示了Hadoop2MR1FlowConnector类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: fieldsCountGreaterThanColumnsTest

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
 * Tests if correct number of input fields are provided.
 */
@Test(expected = RuntimeException.class)
public void fieldsCountGreaterThanColumnsTest() {

  String sourcePath = "src/test/resources/input/with-headers.txt";
  String sinkPath = "src/test/resources/output/sink-with-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat(',')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Fields sourceFields = new Fields("id", "last name", "first name", "phone");
  Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java

示例2: testWhenExtraColumnsStrict

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrict() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns.txt";
  String sinkPath = "src/test/resources/input/sink-with-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withHeader("id", "first name", "last name", "city", "zip")
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:24,代码来源:CsvSchemeTest.java

示例3: testWhenExtraColumnsStrictNoHeaders

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrictNoHeaders() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
  String sinkPath = "src/test/resources/input/sink-no-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:23,代码来源:CsvSchemeTest.java

示例4: testSchemeFields

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
 * Helper method used for assertion of fields generated by CsvScheme.
 */
@SuppressWarnings("unchecked")
private void testSchemeFields(String sourcePath, CsvScheme sourceSchema, String sinkPath, CsvScheme sinkScheme, Set<String> expected) {

  Tap source = new Hfs(sourceSchema, sourcePath);
  Tap sink = new Hfs(sinkScheme, sinkPath);
  Pipe pipe = new Pipe("pipe");

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  connector.connect(source, sink, pipe).complete();

  Fields sinkFields = sink.getSinkFields();
  for (int i = 0; i < sinkFields.size(); i++) {
    assertTrue("Unexpected column " + sinkFields.get(i), expected.contains(sinkFields.get(i)));
    expected.remove(sinkFields.get(i));
  }

  assertTrue("Not all expected values are found", expected.isEmpty());

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:23,代码来源:CsvSchemeTest.java

示例5: headerCountMismatchColumnsTest

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
 * Tests if correct number of input headers are provided.
 */
@Test(expected = RuntimeException.class)
public void headerCountMismatchColumnsTest() {

  String sourcePath = "src/test/resources/input/with-headers.txt";
  String sinkPath = "src/test/resources/output/sink-with-headers";


  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat(',')
    .withQuote('"')
    .withHeader("id", "first name", "last name", "phone")
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:30,代码来源:CsvSchemeTest.java

示例6: fieldsIncludedButNotMatchLengthTest

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
 * Tests if subset of input fields are provided, properly outputs only that subset.
 */
@Test
public void fieldsIncludedButNotMatchLengthTest() throws Exception {

  String sourcePath = "src/test/resources/input/with-headers.txt";
  String sinkPath = "src/test/resources/output/sink-with-headers";
  String expectedPath = "src/test/resources/expected/sink-with-headers-id-only.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat(',')
    .withHeader("id", "first name", "last name")
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Fields sourceFields = new Fields("id");
  Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

  testPaths(sinkPath, expectedPath);

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:33,代码来源:CsvSchemeTest.java

示例7: testWhenFieldsAndHeadersAreinDifferentOrder

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenFieldsAndHeadersAreinDifferentOrder() throws Exception {

  String sourcePath = "src/test/resources/input/with-headers.txt";
  String sinkPath = "src/test/resources/output/sink-with-headers";
  String expectedPath = "src/test/resources/expected/with-headers-difforder.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat(',')
    .withQuote('"')
    .withHeader("id", "first name", "last name")
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Fields sourceFields = new Fields("id", "last name", "first name");

  Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

  testPaths(sinkPath, expectedPath);

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:32,代码来源:CsvSchemeTest.java

示例8: testWhenExtraColumnsNotStrict

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrict() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns.txt";
  String sinkPath = "src/test/resources/input/sink-with-headers";
  String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict.txt";
  String trapPath = "src/test/resources/input/trap-sink-with-headers";
  String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withHeader("id", "first name", "last name", "city", "zip")
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
  Tap trap = new Hfs(new TextDelimited(true, "\t"), trapPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect("extra-columns-not-strict", source, sink, trap, pipe).complete();

  testPaths(sinkPath, expectedPath);
  testPaths(trapPath, expectedTrapPath);
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:32,代码来源:CsvSchemeTest.java

示例9: testWhenExtraColumnsNotStrictNoHeaders

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void testWhenExtraColumnsNotStrictNoHeaders() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
  String sinkPath = "src/test/resources/input/sink-no-headers";
  String trapPath = "src/test/resources/input/trap-no-headers";
  String expectedPath = "src/test/resources/expected/with-extra-columns-no-strict-no-header.txt";
  String expectedTrapPath = "src/test/resources/expected/trap-with-extra-columns-no-strict-no-header.txt";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, false), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
  Tap trap = new Hfs(new TextDelimited(false, "\t"), trapPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect("test-extra-columns-no-header", source, sink, trap, pipe).complete();
  testPaths(sinkPath, expectedPath);
  testPaths(trapPath, expectedTrapPath);
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java

示例10: readWriteInFlowMR1

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
@Test
public void readWriteInFlowMR1() throws IOException {
  try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
      .addField("a", TypeInfoFactory.stringTypeInfo)
      .addField("b", TypeInfoFactory.stringTypeInfo)
      .build()) {
    writer.addRow("A1", "B1");
    writer.addRow("A2", "B2");
  }

  String output = new File(temporaryFolder.getRoot(), "output").getCanonicalPath();

  Pipe pipe = new Pipe(UUID.randomUUID().toString());
  FlowDef flowDef = FlowDef
      .flowDef()
      .setName(UUID.randomUUID().toString())
      .addSource(pipe, new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path))
      .addTailSink(pipe, new Hfs(OrcFile.sink().schema(FIELDS_AB).build(), output));

  Flow<?> flow = new Hadoop2MR1FlowConnector(HadoopUtil.createProperties(conf)).connect(flowDef);
  flow.complete();
  flow.cleanup();

  try (OrcReader reader = new OrcReader(conf, new Path(output, "part-00000"))) {
    assertThat(reader.hasNext(), is(true));
    List<Object> list = reader.next();
    assertThat(list.size(), is(2));
    assertThat(list.get(0), is((Object) "A1"));
    assertThat(list.get(1), is((Object) "B1"));

    assertThat(reader.hasNext(), is(true));
    list = reader.next();
    assertThat(list.size(), is(2));
    assertThat(list.get(0), is((Object) "A2"));
    assertThat(list.get(1), is((Object) "B2"));

    assertThat(reader.hasNext(), is(false));
  }
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:40,代码来源:OrcFileTest.java

示例11: testScheme

import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; //导入依赖的package包/类
/**
 * Tests a source and sink scheme together.
 */
private void testScheme(String sourcePath, CSVFormat sourceFormat, String sinkPath, CSVFormat sinkFormat, String expectedPath, boolean strict) throws Exception {

  FlowConnector connector = new Hadoop2MR1FlowConnector();

  Tap source = new Hfs(new CsvScheme(sourceFormat, strict), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

  testPaths(sinkPath, expectedPath);

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:18,代码来源:CsvSchemeTest.java


注:本文中的cascading.flow.hadoop2.Hadoop2MR1FlowConnector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。