本文整理汇总了Java中cascading.tap.hadoop.Hfs类的典型用法代码示例。如果您正苦于以下问题:Java Hfs类的具体用法?Java Hfs怎么用?Java Hfs使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Hfs类属于cascading.tap.hadoop包,在下文中一共展示了Hfs类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: fieldsCountGreaterThanColumnsTest
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
* Tests if correct number of input fields are provided.
*/
@Test(expected = RuntimeException.class)
public void fieldsCountGreaterThanColumnsTest() {
String sourcePath = "src/test/resources/input/with-headers.txt";
String sinkPath = "src/test/resources/output/sink-with-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat(',')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withSkipHeaderRecord()
.withEscape('\\')
.withRecordSeparator('\n');
Fields sourceFields = new Fields("id", "last name", "first name", "phone");
Tap source = new Hfs(new CsvScheme(sourceFields, sourceFormat), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例2: testWhenExtraColumnsStrict
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrict() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns.txt";
String sinkPath = "src/test/resources/input/sink-with-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withHeader("id", "first name", "last name", "city", "zip")
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例3: testWhenExtraColumnsStrictNoHeaders
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test(expected = FlowException.class)
public void testWhenExtraColumnsStrictNoHeaders() throws Exception {
String sourcePath = "src/test/resources/input/with-extra-columns-no-header.txt";
String sinkPath = "src/test/resources/input/sink-no-headers";
FlowConnector connector = new Hadoop2MR1FlowConnector();
CSVFormat sourceFormat = CSVFormat.newFormat('\t')
.withQuote('"')
.withEscape('\\')
.withRecordSeparator('\n');
CSVFormat sinkFormat = CSVFormat.newFormat('\t')
.withEscape('\\')
.withRecordSeparator('\n');
Tap source = new Hfs(new CsvScheme(sourceFormat, true), sourcePath);
Tap sink = new Hfs(new CsvScheme(sinkFormat), sinkPath, SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
connector.connect(source, sink, pipe).complete();
}
示例4: testSchemeFields
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
* Helper method used for assertion of fields generated by CsvScheme.
*/
@SuppressWarnings("unchecked")
private void testSchemeFields(String sourcePath, CsvScheme sourceSchema, String sinkPath, CsvScheme sinkScheme, Set<String> expected) {
Tap source = new Hfs(sourceSchema, sourcePath);
Tap sink = new Hfs(sinkScheme, sinkPath);
Pipe pipe = new Pipe("pipe");
FlowConnector connector = new Hadoop2MR1FlowConnector();
connector.connect(source, sink, pipe).complete();
Fields sinkFields = sink.getSinkFields();
for (int i = 0; i < sinkFields.size(); i++) {
assertTrue("Unexpected column " + sinkFields.get(i), expected.contains(sinkFields.get(i)));
expected.remove(sinkFields.get(i));
}
assertTrue("Not all expected values are found", expected.isEmpty());
}
示例5: testPaths
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
/**
* Tests the content of an output path against the given expected path.
*/
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {
Tap outputTest = new Hfs(new TextLine(), actual);
Tap expectedTest = new Hfs(new TextLine(), expected);
FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);
List<String> outputList = new ArrayList<>();
while (outputIterator.hasNext()) {
outputList.add(outputIterator.next().getTuple().getString(1));
}
List<String> expectedList = new ArrayList<>();
while (expectedIterator.hasNext()) {
expectedList.add(expectedIterator.next().getTuple().getString(1));
}
assertTrue(outputList.equals(expectedList));
}
示例6: openTrapForWrite
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {
if (trap instanceof Hfs) {
JobConf jobConf = new JobConf(this.getConfigCopy());
int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 );
int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 );
String partname = String.format( "-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum() );
jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname );
String value = String.format( "attempt_%012d_0000_m_%06d_0", (int) Math.rint( System.currentTimeMillis() ), this.getCurrentSliceNum() );
jobConf.set( "mapred.task.id", value );
jobConf.set( "mapreduce.task.id", value );
return trap.openForWrite( new FlinkFlowProcess( jobConf ), null);
}
else {
throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
}
}
示例7: writeTypical
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void writeTypical() throws IOException {
Data data = new DataBuilder(FIELDS_AB).addTuple("A1", "B1").addTuple("A2", "B2").build();
Tap<?, ?, ?> tap = new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path);
Plunger.writeData(data).toTap(tap);
try (OrcReader reader = getOrcReader()) {
assertThat(reader.hasNext(), is(true));
List<Object> list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A1"));
assertThat(list.get(1), is((Object) "B1"));
assertThat(reader.hasNext(), is(true));
list = reader.next();
assertThat(list.size(), is(2));
assertThat(list.get(0), is((Object) "A2"));
assertThat(list.get(1), is((Object) "B2"));
assertThat(reader.hasNext(), is(false));
}
}
示例8: readTypical
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readTypical() throws IOException {
try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000"))
.addField("a", TypeInfoFactory.stringTypeInfo)
.addField("b", TypeInfoFactory.stringTypeInfo)
.build()) {
writer.addRow("A1", "B1");
writer.addRow("A2", "B2");
}
List<TupleEntry> actual = Plunger.readDataFromTap(
new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path)).asTupleEntryList();
List<TupleEntry> expected = new DataBuilder(FIELDS_AB)
.addTuple("A1", "B1")
.addTuple("A2", "B2")
.build()
.asTupleEntryList();
assertThat(actual, is(tupleEntryList(expected)));
}
示例9: readMissing
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readMissing() throws IOException {
try (OrcWriter writer = new OrcWriter.Builder(conf, new Path(path, "part-00000")).addField("a",
TypeInfoFactory.stringTypeInfo).build()) {
writer.addRow("A1");
writer.addRow("A2");
}
List<TupleEntry> actual = Plunger.readDataFromTap(
new Hfs(OrcFile.source().declaredFields(FIELDS_AB).schemaFromFile().build(), path)).asTupleEntryList();
List<TupleEntry> expected = new DataBuilder(FIELDS_AB)
.addTuple("A1", null)
.addTuple("A2", null)
.build()
.asTupleEntryList();
assertThat(actual, is(tupleEntryList(expected)));
}
示例10: readStringPredicatePushdown
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readStringPredicatePushdown() throws IOException {
TypeInfo typeInfo = TypeInfoFactory.stringTypeInfo;
try (OrcWriter writer = getOrcWriter(typeInfo)) {
writer.addRow("hello");
writer.addRow("world");
}
StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", TypeInfoFactory.stringTypeInfo).build();
SearchArgument searchArgument = SearchArgumentFactory.newBuilder().startAnd().equals("a", "hello").end().build();
OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
Tap<?, ?, ?> tap = new Hfs(orcFile, path);
List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();
assertThat(list.size(), is(1));
assertThat(list.get(0).getObject(0), is((Object) "hello"));
}
示例11: readDecimalPredicatePushdown
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readDecimalPredicatePushdown() throws IOException {
TypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(2, 1);
try (OrcWriter writer = getOrcWriter(typeInfo)) {
writer.addRow(HiveDecimal.create("0.0"));
writer.addRow(HiveDecimal.create("0.1"));
}
StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", typeInfo).build();
SearchArgument searchArgument = SearchArgumentFactory
.newBuilder()
.startAnd()
.equals("a", new BigDecimal("0.1"))
.end()
.build();
OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
Tap<?, ?, ?> tap = new Hfs(orcFile, path);
List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();
assertThat(list.size(), is(1));
assertThat(list.get(0).getObject(0), is((Object) new BigDecimal("0.1")));
}
示例12: readCharPredicatePushdown
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void readCharPredicatePushdown() throws IOException {
TypeInfo typeInfo = TypeInfoFactory.getCharTypeInfo(3);
try (OrcWriter writer = getOrcWriter(typeInfo)) {
writer.addRow(new HiveChar("foo", 3));
writer.addRow(new HiveChar("bar", 3));
}
StructTypeInfo structTypeInfo = new StructTypeInfoBuilder().add("a", typeInfo).build();
SearchArgument searchArgument = SearchArgumentFactory
.newBuilder()
.startAnd()
.equals("a", new HiveChar("foo", 5))
.end()
.build();
OrcFile orcFile = OrcFile.source().columns(structTypeInfo).schemaFromFile().searchArgument(searchArgument).build();
Tap<?, ?, ?> tap = new Hfs(orcFile, path);
List<Tuple> list = Plunger.readDataFromTap(tap).asTupleList();
assertThat(list.size(), is(1));
assertThat(list.get(0).getObject(0), is((Object) "foo"));
}
示例13: testWithHeader
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void testWithHeader() throws Exception {
final String inputFile = "quoted_header.csv";
final String outputDir = "quoted_header";
final String compareFile = "quoted_header.csv";
final Properties props = new Properties();
final Configuration conf = new Configuration();
final Tap source = new Hfs(new OpenCsvScheme(), DATA_DIR + "/" + inputFile, SinkMode.KEEP);
final Tap sink = new Hfs(new OpenCsvScheme(), TMP_DIR + "/" + outputDir, SinkMode.REPLACE);
final Pipe pipe = new Each(new Pipe("test"), new Debug());
new HadoopFlowConnector(props).connect(source, sink, pipe).complete();
final Tap compare = new Hfs(new OpenCsvScheme(), COMPARE_DIR + "/" + compareFile, SinkMode.KEEP);
assertTrue(compareTaps(sink, compare, conf) == true);
}
示例14: testHeaderless
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
@Test
public void testHeaderless() throws Exception {
final String inputFile = "quoted_headerless.csv";
final String outputDir = "quoted_headerless";
final String compareFile = "quoted_headerless.csv";
final Properties props = new Properties();
final Configuration conf = new Configuration();
final Tap source = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), DATA_DIR + "/" + inputFile, SinkMode.KEEP);
final Tap sink = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), TMP_DIR + "/" + outputDir, SinkMode.REPLACE);
final Pipe pipe = new Each(new Pipe("test"), new Debug());
new HadoopFlowConnector(props).connect(source, sink, pipe).complete();
final Tap compare = new Hfs(new OpenCsvScheme(new Fields("id", "product", "descr")), COMPARE_DIR + "/" + compareFile, SinkMode.KEEP);
assertTrue(compareTaps(sink, compare, conf) == true);
}
示例15: main
import cascading.tap.hadoop.Hfs; //导入依赖的package包/类
public static void main(String [] args) {
Properties properties = new Properties();
properties.put(SplunkConf.SPLUNK_USERNAME, "admin");
properties.put(SplunkConf.SPLUNK_PASSWORD, "changeIt");
properties.put(SplunkConf.SPLUNK_HOST, "localhost");
properties.put(SplunkConf.SPLUNK_PORT, "9050");
SplunkDataQuery splunkSearch = new SplunkDataQuery();
SplunkScheme inputScheme = new SplunkScheme(splunkSearch);
SplunkTap input = new SplunkTap(properties,inputScheme);
TextLine outputScheme = new TextLine();
Hfs output = new Hfs( outputScheme, PATH_TO_OUTPUT, SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Flow flow = new HadoopFlowConnector().connect( input, output, pipe );
flow.complete();
}