本文整理汇总了Java中org.apache.crunch.Pipeline.run方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.run方法的具体用法?Java Pipeline.run怎么用?Java Pipeline.run使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.crunch.Pipeline
的用法示例。
在下文中一共展示了Pipeline.run方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testGeneric
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Test
public void testGeneric() throws IOException {
Dataset<Record> inputDataset = repo.create("in", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).build());
Dataset<Record> outputDataset = repo.create("out", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).build());
// write two files, each of 5 records
writeTestUsers(inputDataset, 5, 0);
writeTestUsers(inputDataset, 5, 5);
Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
PCollection<GenericData.Record> data = pipeline.read(
CrunchDatasets.asSource(inputDataset, GenericData.Record.class));
pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
pipeline.run();
checkTestUsers(outputDataset, 10);
}
示例2: testGenericParquet
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Test
public void testGenericParquet() throws IOException {
Dataset<Record> inputDataset = repo.create("in", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).format(Formats.PARQUET).build());
Dataset<Record> outputDataset = repo.create("out", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).format(Formats.PARQUET).build());
// write two files, each of 5 records
writeTestUsers(inputDataset, 5, 0);
writeTestUsers(inputDataset, 5, 5);
Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
PCollection<GenericData.Record> data = pipeline.read(
CrunchDatasets.asSource(inputDataset, GenericData.Record.class));
pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
pipeline.run();
checkTestUsers(outputDataset, 10);
}
示例3: testPartitionedSourceAndTarget
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Test
@SuppressWarnings("deprecation")
public void testPartitionedSourceAndTarget() throws IOException {
PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
"username", 2).build();
Dataset<Record> inputDataset = repo.create("in", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
Dataset<Record> outputDataset = repo.create("out", new DatasetDescriptor.Builder()
.schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
writeTestUsers(inputDataset, 10);
PartitionKey key = partitionStrategy.partitionKey(0);
Dataset<Record> inputPart0 = inputDataset.getPartition(key, false);
Dataset<Record> outputPart0 = outputDataset.getPartition(key, true);
Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
PCollection<GenericData.Record> data = pipeline.read(
CrunchDatasets.asSource(inputPart0, GenericData.Record.class));
pipeline.write(data, CrunchDatasets.asTarget(outputPart0), Target.WriteMode.APPEND);
pipeline.run();
Assert.assertEquals(5, datasetSize(outputPart0));
}
示例4: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
public void run() {
// TODO: Switch to parameterized views.
View<ExampleEvent> view = Datasets.load(ScheduledReportApp.EXAMPLE_DS_URI,
ExampleEvent.class);
RefinableView<GenericRecord> target = Datasets.load(ScheduledReportApp.REPORT_DS_URI,
GenericRecord.class);
// Get the view into which this report will be written.
DateTime dateTime = getNominalTime().toDateTime(DateTimeZone.UTC);
View<GenericRecord> output = target
.with("year", dateTime.getYear())
.with("month", dateTime.getMonthOfYear())
.with("day", dateTime.getDayOfMonth())
.with("hour", dateTime.getHourOfDay())
.with("minute", dateTime.getMinuteOfHour());
Pipeline pipeline = getPipeline();
PCollection<ExampleEvent> events = pipeline.read(CrunchDatasets.asSource(view));
PTable<Long, ExampleEvent> eventsByUser = events.by(new GetEventId(), Avros.longs());
// Count of events by user ID.
PTable<Long, Long> userEventCounts = eventsByUser.keys().count();
PCollection<GenericData.Record> report = userEventCounts.parallelDo(
new ToUserReport(),
Avros.generics(SCHEMA));
pipeline.write(report, CrunchDatasets.asTarget(output));
pipeline.run();
}
示例5: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
public void run(@DataIn(name="example_events", type=ExampleEvent.class) View<ExampleEvent> input,
@DataOut(name="odd_users", type=ExampleEvent.class) View<ExampleEvent> output) {
Pipeline pipeline = getPipeline();
PCollection<ExampleEvent> events = pipeline.read(CrunchDatasets.asSource(input));
PCollection<ExampleEvent> oddUsers = events.filter(new KeepOddUsers());
pipeline.write(oddUsers, CrunchDatasets.asTarget(output));
pipeline.run();
}