本文整理汇总了Java中org.kitesdk.data.View类的典型用法代码示例。如果您正苦于以下问题:Java View类的具体用法?Java View怎么用?Java View使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
View类属于org.kitesdk.data包,在下文中一共展示了View类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.kitesdk.data.View; //导入依赖的package包/类
public void run(@DataIn(name="source.events", type=StandardEvent.class) View<StandardEvent> input,
@DataOut(name="target.events", type=StandardEvent.class) View<StandardEvent> output) {
DatasetReader<StandardEvent> reader = input.newReader();
DatasetWriter<StandardEvent> writer = output.newWriter();
try {
while (reader.hasNext()) {
writer.write(reader.next());
}
} finally {
Closeables.closeQuietly(reader);
Closeables.closeQuietly(writer);
}
}
示例2: run
import org.kitesdk.data.View; //导入依赖的package包/类
public void run(@DataIn(name="source_users") View<GenericRecord> input,
@DataOut(name="target_users") View<GenericRecord> output) {
DatasetReader<GenericRecord> reader = input.newReader();
DatasetWriter<GenericRecord> writer = output.newWriter();
try {
while (reader.hasNext()) {
writer.write(reader.next());
}
} finally {
Closeables.closeQuietly(reader);
Closeables.closeQuietly(writer);
}
}
示例3: run
import org.kitesdk.data.View; //导入依赖的package包/类
public void run(@DataIn(name="example_events", type=ExampleEvent.class) View<ExampleEvent> input,
@DataOut(name="odd_users", type=ExampleEvent.class) View<ExampleEvent> output) throws IOException {
Job job = Job.getInstance(getJobContext().getHadoopConf());
DatasetKeyInputFormat.configure(job).readFrom(input);
DatasetKeyOutputFormat.configure(job).writeTo(output);
JavaPairRDD<ExampleEvent, Void> inputData = getJobContext()
.getSparkContext()
.newAPIHadoopRDD(job.getConfiguration(), DatasetKeyInputFormat.class,
ExampleEvent.class, Void.class);
JavaPairRDD<ExampleEvent, Void> filteredData = inputData.filter(new KeepOddUsers());
filteredData.saveAsNewAPIHadoopDataset(job.getConfiguration());
}
示例4: run
import org.kitesdk.data.View; //导入依赖的package包/类
@Override
public int run(List<String> args) throws Exception {
Preconditions.checkState(!Datasets.exists(uri),
"events dataset already exists");
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(StandardEvent.class).build();
View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class);
DatasetWriter<StandardEvent> writer = events.newWriter();
try {
while (System.currentTimeMillis() - baseTimestamp < 36000) {
writer.write(generateRandomEvent());
}
} finally {
writer.close();
}
System.out.println("Generated " + counter + " events");
return 0;
}
示例5: read
import org.kitesdk.data.View; //导入依赖的package包/类
public static <T> HashSet<T> read(View<T> view) {
DatasetReader<T> reader = null;
try {
reader = view.newReader();
return Sets.newHashSet(reader.iterator());
} finally {
if (reader != null) {
reader.close();
}
}
}
示例6: save
import org.kitesdk.data.View; //导入依赖的package包/类
/**
* Save all RDDs in the given DStream to the given view.
* @param dstream
* @param view
*/
public static <T> void save(JavaDStream<T> dstream, final View<T> view) {
final String uri = view.getUri().toString();
dstream.foreachRDD(new Function2<JavaRDD<T>, Time, Void>() {
@Override
public Void call(JavaRDD<T> rdd, Time time) throws Exception {
save(rdd, uri);
return null;
}
});
}
示例7: run
import org.kitesdk.data.View; //导入依赖的package包/类
public void run(@DataIn(name="source.users") View<GenericRecord> input,
@DataOut(name="target.users") View<GenericRecord> output) throws IOException {
Job job = Job.getInstance();
DatasetKeyInputFormat.configure(job).readFrom(input);
DatasetKeyOutputFormat.configure(job).writeTo(output);
@SuppressWarnings("unchecked")
JavaPairRDD<GenericData.Record, Void> inputData = getJobContext()
.getSparkContext()
.newAPIHadoopRDD(job.getConfiguration(), DatasetKeyInputFormat.class,
GenericData.Record.class, Void.class);
inputData.saveAsNewAPIHadoopDataset(job.getConfiguration());
}
示例8: loadWhenAvailable
import org.kitesdk.data.View; //导入依赖的package包/类
/**
* Loads the expected number of items when they are available in the dataset.
* @param expected
* @return
*/
private <T> List<T> loadWhenAvailable(View<T> view, int expected) {
for (int attempt = 0; attempt < 20; ++attempt) {
List<T> items = Lists.newArrayList();
DatasetReader<T> reader = view.newReader();
int count = 0;
try {
for (; count < expected; ++count) {
if (!reader.hasNext())
continue;
items.add(reader.next());
}
} finally {
reader.close();
}
if (count == expected) {
return items;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
Assert.fail("Unable to load the expected items.");
return null;
}
示例9: testJob
import org.kitesdk.data.View; //导入依赖的package包/类
@Test
public void testJob() throws IOException {
List<SmallEvent> events = Lists.newArrayList();
for (int i = 0; i < 10; ++i) {
SmallEvent event = SmallEvent.newBuilder()
.setSessionId("1234")
.setUserId(i)
.build();
events.add(event);
}
harness.writeMessages(StreamingSparkApp.TOPIC_NAME, events);
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
e.printStackTrace();
}
View<SmallEvent> view = Datasets.load(StreamingSparkApp.EVENTS_DS_URI, SmallEvent.class);
List<SmallEvent> results = loadWhenAvailable(view, 10);
Assert.assertEquals(events, results);
}
示例10: checkMessages
import org.kitesdk.data.View; //导入依赖的package包/类
/**
* Checks if the given view contains the expected messages within a timeout period.
*/
static boolean checkMessages(View view, List<ExampleEvent> expected, int timeoutSeconds) throws InterruptedException {
List<ExampleEvent> actual = loadWhenAvailable(view, expected.size(), timeoutSeconds);
if (actual == null) {
return false;
}
return expected.equals(actual);
}
示例11: newWriter
import org.kitesdk.data.View; //导入依赖的package包/类
private DatasetWriter<GenericRecord> newWriter(
final UserGroupInformation login, final URI uri) {
View<GenericRecord> view = KerberosUtil.runPrivileged(login,
new PrivilegedExceptionAction<Dataset<GenericRecord>>() {
@Override
public Dataset<GenericRecord> run() {
return Datasets.load(uri);
}
});
DatasetDescriptor descriptor = view.getDataset().getDescriptor();
String formatName = descriptor.getFormat().getName();
Preconditions.checkArgument(allowedFormats().contains(formatName),
"Unsupported format: " + formatName);
Schema newSchema = descriptor.getSchema();
if (targetSchema == null || !newSchema.equals(targetSchema)) {
this.targetSchema = descriptor.getSchema();
// target dataset schema has changed, invalidate all readers based on it
readers.invalidateAll();
}
this.reuseDatum = !("parquet".equals(formatName));
this.datasetName = view.getDataset().getName();
return view.newWriter();
}
示例12: run
import org.kitesdk.data.View; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final long startOfToday = startOfDay();
// the destination dataset
Dataset<Record> persistent = Datasets.load(
"dataset:file:/tmp/data/logs", Record.class);
// the source: anything before today in the staging area
Dataset<Record> staging = Datasets.load(
"dataset:file:/tmp/data/logs_staging", Record.class);
View<Record> ready = staging.toBefore("timestamp", startOfToday);
ReadableSource<Record> source = CrunchDatasets.asSource(ready);
PCollection<Record> stagedLogs = read(source);
getPipeline().write(stagedLogs,
CrunchDatasets.asTarget(persistent), Target.WriteMode.APPEND);
PipelineResult result = run();
if (result.succeeded()) {
// remove the source data partition from staging
ready.deleteAll();
return 0;
} else {
return 1;
}
}
示例13: signalOutputViews
import org.kitesdk.data.View; //导入依赖的package包/类
/**
* Signal the produced views as ready for downstream processing.
*/
protected void signalOutputViews(Map<String,View> views) {
// If the job specified output parameters,
// signal them when we complete.
JobParameters params = getJobParameters();
if (params != null) {
Set<String> outputNames = getJobParameters().getOutputNames();
for (String outputName: outputNames) {
View view = views.get(outputName);
if (view instanceof Signalable) {
((Signalable) view).signalReady();
}
}
}
}
示例14: runScheduledJobs
import org.kitesdk.data.View; //导入依赖的package包/类
/**
* Runs all scheduled jobs in the application using the given
* nominal time.
*/
public void runScheduledJobs(Instant nominalTime) {
Map<String,View> views = Maps.newHashMap();
for (Schedule schedule: app.getSchedules()) {
for (Schedule.ViewTemplate input: schedule.getViewTemplates().values()) {
String uri = resolveTemplate(input.getUriTemplate(), nominalTime);
View view = Datasets.load(uri, input.getInputType());
views.put(input.getName(), view);
}
}
runScheduledJobs(nominalTime, views);
}
示例15: run
import org.kitesdk.data.View; //导入依赖的package包/类
public void run(@DataOut(name="kv-output", type= KeyValues.class) View<KeyValues> output) {
DatasetWriter<KeyValues> writer = output.newWriter();
try {
JobContext context = getJobContext();
KeyValues kv = KeyValues.newBuilder()
.setJobsettings(context.getSettings())
.setOutputsettings(context.getOutputSettings("kv-output"))
.build();
writer.write(kv);
} finally {
Closeables.closeQuietly(writer);
}
}