当前位置: 首页>>代码示例>>Java>>正文


Java Hfs类代码示例

本文整理汇总了Java中cascading.tap.hadoop.Hfs的典型用法代码示例。如果您正苦于以下问题:Java Hfs类的具体用法?Java Hfs怎么用?Java Hfs使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Hfs类属于cascading.tap.hadoop包,在下文中一共展示了Hfs类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: fieldsCountGreaterThanColumnsTest

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
 * Tests if correct number of input fields are provided.
 */
@Test(expected = RuntimeException.class)
public void fieldsCountGreaterThanColumnsTest() {

  String sourcePath = "src/test/resources/input/with-headers.txt";
  String sinkPath = "src/test/resources/output/sink-with-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat(',')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withSkipHeaderRecord()
    .withEscape('\\')
    .withRecordSeparator('\n');

  Fields sourceFields = new Fields("id", "last name", "first name", "phone");
  Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java

示例2: testWhenExtraColumnsStrict

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrict() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns.txt";
  String sinkPath = "src/test/resources/input/sink-with-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withHeader("id", "first name", "last name", "city", "zip")
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:24,代码来源:CsvSchemeTest.java

示例3: testWhenExtraColumnsStrictNoHeaders

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrictNoHeaders() throws Exception {
  String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
  String sinkPath = "src/test/resources/input/sink-no-headers";

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  CSVFormat sourceFormat = CSVFormat.newFormat('\t')
    .withQuote('"')
    .withEscape('\\')
    .withRecordSeparator('\n');

  CSVFormat sinkFormat = CSVFormat.newFormat('\t')
    .withEscape('\\')
    .withRecordSeparator('\n');

  Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
  Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);

  Pipe pipe = new Pipe("pipe");

  connector.connect(source, sink, pipe).complete();
}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:23,代码来源:CsvSchemeTest.java

示例4: testSchemeFields

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
 * Helper method used for assertion of fields generated by CsvScheme.
 */
@SuppressWarnings("unchecked")
private void testSchemeFields(String sourcePath, CsvScheme sourceSchema, String sinkPath, CsvScheme sinkScheme, Set<String> expected) {

  Tap source = new Hfs(sourceSchema, sourcePath);
  Tap sink = new Hfs(sinkScheme, sinkPath);
  Pipe pipe = new Pipe("pipe");

  FlowConnector connector = new Hadoop2MR1FlowConnector();
  connector.connect(source, sink, pipe).complete();

  Fields sinkFields = sink.getSinkFields();
  for (int i = 0; i < sinkFields.size(); i++) {
    assertTrue("Unexpected column " + sinkFields.get(i), expected.contains(sinkFields.get(i)));
    expected.remove(sinkFields.get(i));
  }

  assertTrue("Not all expected values are found", expected.isEmpty());

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:23,代码来源:CsvSchemeTest.java

示例5: testPaths

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
 * Tests the content of an output path against the given expected path.
 */
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {

  Tap outputTest = new Hfs(new TextLine(), actual);
  Tap expectedTest = new Hfs(new TextLine(), expected);

  FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
  FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));

  TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
  TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);

  List<String> outputList = new ArrayList<>();
  while (outputIterator.hasNext()) {
    outputList.add(outputIterator.next().getTuple().getString(1));
  }

  List<String> expectedList = new ArrayList<>();
  while (expectedIterator.hasNext()) {
    expectedList.add(expectedIterator.next().getTuple().getString(1));
  }

  assertTrue(outputList.equals(expectedList));

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java

示例6: openTrapForWrite

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {

	if (trap instanceof Hfs) {

		JobConf jobConf = new JobConf(this.getConfigCopy());

		int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 );
		int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 );

		String partname = String.format( "-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum() );
		jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname );

		String value = String.format( "attempt_%012d_0000_m_%06d_0", (int) Math.rint( System.currentTimeMillis() ), this.getCurrentSliceNum() );
		jobConf.set( "mapred.task.id", value );
		jobConf.set( "mapreduce.task.id", value );

		return trap.openForWrite( new FlinkFlowProcess( jobConf ), null);
	}
	else {
		throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
	}
}
 
开发者ID:dataArtisans,项目名称:cascading-flink,代码行数:24,代码来源:FlinkFlowProcess.java

示例7: writeTypical

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void writeTypical() throws IOException {
  Data data = new DataBuilder(FIELDS_AB).addTuple("A1", "B1").addTuple("A2", "B2").build();
  Tap<?, ?, ?> tap = new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path);

  Plunger.writeData(data).toTap(tap);

  try (OrcReader reader = getOrcReader()) {
    assertThat(reader.hasNext(), is(true));
    List<Object> list = reader.next();
    assertThat(list.size(), is(2));
    assertThat(list.get(0), is((Object) "A1"));
    assertThat(list.get(1), is((Object) "B1"));

    assertThat(reader.hasNext(), is(true));
    list = reader.next();
    assertThat(list.size(), is(2));
    assertThat(list.get(0), is((Object) "A2"));
    assertThat(list.get(1), is((Object) "B2"));

    assertThat(reader.hasNext(), is(false));
  }
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:24,代码来源:OrcFileTest.java

示例8: readTypical

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readTypical() throws IOException {
  try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
      .addField("a", TypeInfoFactory.stringTypeInfo)
      .addField("b", TypeInfoFactory.stringTypeInfo)
      .build()) {
    writer.addRow("A1", "B1");
    writer.addRow("A2", "B2");
  }

  List<TupleEntry> actual = Plunger.readDataFromTap(
      new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path)).asTupleEntryList();
  List<TupleEntry> expected = new DataBuilder(FIELDS_AB)
      .addTuple("A1", "B1")
      .addTuple("A2", "B2")
      .build()
      .asTupleEntryList();

  assertThat(actual, is(tupleEntryList(expected)));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:21,代码来源:OrcFileTest.java

示例9: readMissing

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readMissing() throws IOException {
  try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000")).addField("a",
      TypeInfoFactory.stringTypeInfo).build()) {
    writer.addRow("A1");
    writer.addRow("A2");
  }

  List<TupleEntry> actual = Plunger.readDataFromTap(
      new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path)).asTupleEntryList();
  List<TupleEntry> expected = new DataBuilder(FIELDS_AB)
      .addTuple("A1", null)
      .addTuple("A2", null)
      .build()
      .asTupleEntryList();
  assertThat(actual, is(tupleEntryList(expected)));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:18,代码来源:OrcFileTest.java

示例10: readStringPredicatePushdown

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readStringPredicatePushdown() throws IOException {
  TypeInfo typeInfo = TypeInfoFactory.stringTypeInfo;

  try (OrcWriter writer = getOrcWriter(typeInfo)) {
    writer.addRow("hello");
    writer.addRow("world");
  }

  StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo).build();

  SearchArgument searchArgument = SearchArgumentFactory.newBuilder().startAnd().equals("a", "hello").end().build();

  OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
  Tap<?, ?, ?> tap = new Hfs(orcFile, path);

  List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();

  assertThat(list.size(), is(1));
  assertThat(list.get(0).getObject(0), is((Object) "hello"));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:22,代码来源:OrcFileTest.java

示例11: readDecimalPredicatePushdown

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readDecimalPredicatePushdown() throws IOException {
  TypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(2, 1);

  try (OrcWriter writer = getOrcWriter(typeInfo)) {
    writer.addRow(HiveDecimal.create("0.0"));
    writer.addRow(HiveDecimal.create("0.1"));
  }

  StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", typeInfo).build();

  SearchArgument searchArgument = SearchArgumentFactory
      .newBuilder()
      .startAnd()
      .equals("a", new BigDecimal("0.1"))
      .end()
      .build();

  OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
  Tap<?, ?, ?> tap = new Hfs(orcFile, path);

  List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();

  assertThat(list.size(), is(1));
  assertThat(list.get(0).getObject(0), is((Object) new BigDecimal("0.1")));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:27,代码来源:OrcFileTest.java

示例12: readCharPredicatePushdown

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readCharPredicatePushdown() throws IOException {
  TypeInfo typeInfo = TypeInfoFactory.getCharTypeInfo(3);

  try (OrcWriter writer = getOrcWriter(typeInfo)) {
    writer.addRow(new HiveChar("foo", 3));
    writer.addRow(new HiveChar("bar", 3));
  }

  StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", typeInfo).build();

  SearchArgument searchArgument = SearchArgumentFactory
      .newBuilder()
      .startAnd()
      .equals("a", new HiveChar("foo", 5))
      .end()
      .build();

  OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
  Tap<?, ?, ?> tap = new Hfs(orcFile, path);

  List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();

  assertThat(list.size(), is(1));
  assertThat(list.get(0).getObject(0), is((Object) "foo"));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:27,代码来源:OrcFileTest.java

示例13: testWithHeader

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void testWithHeader() throws Exception {
    final String inputFile = "quoted_header.csv";
    final String outputDir = "quoted_header";
    final String compareFile = "quoted_header.csv";
    final Properties props = new Properties();
    final Configuration conf = new Configuration();

    final Tap source = new Hfs(new OpenCsvScheme(), DATA_DIR + "/" + inputFile, SinkMode.KEEP);
    final Tap sink = new Hfs(new OpenCsvScheme(), TMP_DIR + "/" + outputDir, SinkMode.REPLACE);
    final Pipe pipe = new Each(new Pipe("test"), new Debug());
    new HadoopFlowConnector(props).connect(source, sink, pipe).complete();
    
    final Tap compare = new Hfs(new OpenCsvScheme(), COMPARE_DIR + "/" + compareFile, SinkMode.KEEP);
    assertTrue(compareTaps(sink, compare, conf) == true);
}
 
开发者ID:tresata,项目名称:cascading-opencsv,代码行数:17,代码来源:OpenCsvSchemeTest.java

示例14: testHeaderless

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void testHeaderless() throws Exception {
    final String inputFile = "quoted_headerless.csv";
    final String outputDir = "quoted_headerless";
    final String compareFile = "quoted_headerless.csv";
    final Properties props = new Properties();
    final Configuration conf = new Configuration();

    final Tap source = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), DATA_DIR + "/" + inputFile, SinkMode.KEEP);
    final Tap sink = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), TMP_DIR + "/" + outputDir, SinkMode.REPLACE);
    final Pipe pipe = new Each(new Pipe("test"), new Debug());
    new HadoopFlowConnector(props).connect(source, sink, pipe).complete();

    final Tap compare = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), COMPARE_DIR + "/" + compareFile, SinkMode.KEEP);
    assertTrue(compareTaps(sink, compare, conf) == true);
}
 
开发者ID:tresata,项目名称:cascading-opencsv,代码行数:17,代码来源:OpenCsvSchemeTest.java

示例15: main

import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
public static void main(String [] args) {

        Properties properties = new Properties();

        properties.put(SplunkConf.SPLUNK_USERNAME, "admin");
        properties.put(SplunkConf.SPLUNK_PASSWORD, "changeIt");
        properties.put(SplunkConf.SPLUNK_HOST, "localhost");
        properties.put(SplunkConf.SPLUNK_PORT, "9050");

        SplunkDataQuery splunkSearch = new SplunkDataQuery();
        SplunkScheme inputScheme = new SplunkScheme(splunkSearch);
        SplunkTap input = new SplunkTap(properties,inputScheme);

        TextLine outputScheme = new TextLine();
        Hfs output = new Hfs( outputScheme, PATH_TO_OUTPUT, SinkMode.REPLACE );

        Pipe pipe = new Pipe( "test" );
        Flow flow = new HadoopFlowConnector().connect( input, output, pipe );

        flow.complete();
    }
 
开发者ID:yolodata,项目名称:tbana,代码行数:22,代码来源:SplunkSchemeExample.java


注:本文中的cascading.tap.hadoop.Hfs类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。