本文整理汇总了Java中org.kitesdk.data.PartitionStrategy类的典型用法代码示例。如果您正苦于以下问题:Java PartitionStrategy类的具体用法?Java PartitionStrategy怎么用?Java PartitionStrategy使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PartitionStrategy类属于org.kitesdk.data包,在下文中一共展示了PartitionStrategy类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testPartitionedData
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Test
public void testPartitionedData() throws EventDeliveryException {
URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
try {
Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
.partitionStrategy(new PartitionStrategy.Builder()
.identity("id", 10) // partition by id
.build())
.build());
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
partitionedUri.toString());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(partitionedUri)));
Assert.assertEquals("Should have committed", 0, remaining(in));
} finally {
if (Datasets.exists(partitionedUri)) {
Datasets.delete(partitionedUri);
}
}
}
示例2: parsePartitionExpression
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
private static PartitionStrategy parsePartitionExpression(String expression) {
List<String> expressions = Arrays.asList(expression.split("/"));
ExpressionParser parser = new SpelExpressionParser();
PartitionStrategy.Builder psb = new PartitionStrategy.Builder();
StandardEvaluationContext ctx = new StandardEvaluationContext(psb);
for (String expr : expressions) {
try {
Expression e = parser.parseExpression(expr);
psb = e.getValue(ctx, PartitionStrategy.Builder.class);
}
catch (SpelParseException spe) {
if (!expr.trim().endsWith(")")) {
throw new StoreException("Invalid partitioning expression '" + expr
+ "' - did you forget the closing parenthesis?", spe);
}
else {
throw new StoreException("Invalid partitioning expression '" + expr + "'!", spe);
}
}
catch (SpelEvaluationException see) {
throw new StoreException("Invalid partitioning expression '" + expr + "' - failed evaluation!", see);
}
catch (NullPointerException npe) {
throw new StoreException("Invalid partitioning expression '" + expr + "' - was evaluated to null!", npe);
}
}
return psb.build();
}
示例3: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public void setup(AppContext context) {
// Create the input and output datasets.
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(DatasetTestUtilities.USER_SCHEMA)
.partitionStrategy(strategy)
.build();
dataset(INPUT_DATASET, descriptor);
dataset(OUTPUT_DATASET, descriptor);
// Schedule our report to run every five minutes.
Schedule schedule = new Schedule.Builder()
.jobName("simple-spark-job")
.jobClass(SimpleSparkJob.class)
.frequency("0 * * * *")
.withInput("source.users", INPUT_URI_PATTERN, "* * * * *")
.withOutput("target.users", OUTPUT_URI_PATTERN)
.build();
schedule(schedule);
}
示例4: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
public void setup(AppContext context) {
// Create the input and output datasets.
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(DatasetTestUtilities.USER_SCHEMA)
.partitionStrategy(strategy)
.build();
dataset(INPUT_DATASET, descriptor);
dataset(OUTPUT_DATASET, descriptor);
// Schedule our report to run every five minutes.
Schedule schedule = new Schedule.Builder()
.jobName("test-job")
.jobClass(ScheduledInputOutputJob.class)
.frequency("0 * * * *")
.withInput("source_users", INPUT_URI_PATTERN, "* * * * *")
.withOutput("target_users", OUTPUT_URI_PATTERN)
.build();
schedule(schedule);
}
示例5: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public void setup(AppContext context) {
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(DatasetTestUtilities.USER_SCHEMA)
.partitionStrategy(strategy)
.build();
// Create the input and output datasets
dataset(INPUT1_DATASET, descriptor);
dataset(INPUT2_DATASET, descriptor);
dataset(OUTPUT_DATASET, descriptor);
Schedule.Builder builder = new Schedule.Builder()
.jobName("test_job")
.jobClass(DynamicInputOutputJob.class)
.frequency("*/5 * * * *")
.withInput("input1", INPUT1_URI_PATTERN, "*/5 * * * *", GenericRecord.class)
.withInput("input2", INPUT2_URI_PATTERN, "*/5 * * * *", GenericRecord.class)
.withOutput("output", OUTPUT_URI_PATTERN, GenericRecord.class);
schedule(builder.build());
}
示例6: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
public void setup(AppContext context) {
// Create a dataset to contain items partitioned by event.
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.provided("minute", "int")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(ExampleEvent.getClassSchema())
.partitionStrategy(strategy)
.build();
dataset(ODD_USER_DS_URI, descriptor);
// Schedule our report to run every five minutes.
Schedule schedule = new Schedule.Builder()
.jobName("example-spark")
.jobClass(SparkJob.class)
.frequency("* * * * *")
.withInput("example_events", EVENT_URI_PATTERN, "* * * * *")
.withOutput("odd_users", ODD_USER_URI_PATTERN)
.build();
schedule(schedule);
}
示例7: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public void setup(AppContext context) {
// Create a dataset to contain items partitioned by event.
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.provided("minute", "int")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(ExampleEvent.getClassSchema())
.partitionStrategy(strategy)
.build();
dataset(ODD_USER_DS_URI, descriptor);
// Schedule our report to run every five minutes.
Schedule schedule = new Schedule.Builder()
.jobName("example-triggered")
.jobClass(TriggeredJob.class)
.frequency("* * * * *")
.withInput("example_events", EVENT_URI_PATTERN, "* * * * *")
.withOutput("odd_users", ODD_USER_URI_PATTERN)
.build();
schedule(schedule);
}
示例8: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
public void setup(AppContext context) {
PartitionStrategy strategy = new PartitionStrategy.Builder()
.provided("year", "int")
.provided("month", "int")
.provided("day", "int")
.provided("hour", "int")
.provided("minute", "int")
.build();
// Create our test dataset.
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(SCHEMA)
.partitionStrategy(strategy)
.build();
dataset(REPORT_DS_URI, descriptor);
// Schedule our report to run every five minutes.
Schedule schedule = new Schedule.Builder()
.jobName("example-scheduled-report")
.jobClass(ScheduledReportJob.class)
.frequency("*/5 * * * *")
.build();
schedule(schedule);
}
示例9: setup
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public void setup(AppContext context) {
// Create the test dataset, partitioned by the minute
// so we quickly createSchedulable data.
PartitionStrategy strategy = new PartitionStrategy.Builder()
.year("timestamp")
.month("timestamp")
.day("timestamp")
.hour("timestamp")
.minute("timestamp")
.build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(ExampleEvent.getClassSchema())
.partitionStrategy(strategy)
.build();
dataset(EVENT_DS_URI, descriptor);
// Schedule our data generation job to run every minute. We wire
// the job's "generate.target" argument to the pattern so it will
// be invoked with the corresponding view.
Schedule schedule = new Schedule.Builder()
.jobName("example-data-generator")
.jobClass(DataGeneratorJob.class)
.frequency("* * * * *")
.withOutput("example_events", EVENT_DS_PATTERN)
.build();
schedule(schedule);
}
示例10: run
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create a partition strategy that hash partitions on username with 10 buckets
PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
.identity("favoriteColor", "favorite_color")
.build();
// Create a dataset of users with the Avro schema
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")
.partitionStrategy(partitionStrategy)
.build();
Dataset<Record> users = Datasets.create(
"dataset:hdfs:/tmp/data/users", descriptor, Record.class);
// Get a writer for the dataset and write some users to it
DatasetWriter<Record> writer = null;
try {
writer = users.newWriter();
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
Record record = builder.set("username", "user-" + i)
.set("creationDate", System.currentTimeMillis())
.set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
if (writer != null) {
writer.close();
}
}
return 0;
}
示例11: run
import org.kitesdk.data.PartitionStrategy; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// where the schema is stored
URI schemaURI = URI.create("resource:simple-log.avsc");
// create a Parquet dataset for long-term storage
Datasets.create("dataset:file:/tmp/data/logs",
new DatasetDescriptor.Builder()
.format(Formats.PARQUET)
.schemaUri(schemaURI)
.partitionStrategy(new PartitionStrategy.Builder()
.year("timestamp", "year")
.month("timestamp", "month")
.day("timestamp", "day")
.build())
.build(), Record.class);
// create an Avro dataset to temporarily hold data
Datasets.create("dataset:file:/tmp/data/logs_staging",
new DatasetDescriptor.Builder()
.format(Formats.AVRO)
.schemaUri(schemaURI)
.partitionStrategy(new PartitionStrategy.Builder()
.day("timestamp", "day")
.build())
.build(), Record.class);
return 0;
}