本文整理汇总了Java中org.kitesdk.data.Datasets.load方法的典型用法代码示例。如果您正苦于以下问题:Java Datasets.load方法的具体用法?Java Datasets.load怎么用?Java Datasets.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.kitesdk.data.Datasets
的用法示例。
在下文中一共展示了Datasets.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: SavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private SavePolicy(Context context) {
String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
Preconditions.checkArgument(uri != null, "Must set "
+ CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
+ "=save");
if (Datasets.exists(uri)) {
dataset = Datasets.load(uri, AvroFlumeEvent.class);
} else {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(AvroFlumeEvent.class)
.build();
dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
}
nEventsHandled = 0;
}
示例2: testTimedFileRolling
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testTimedFileRolling()
throws EventDeliveryException, InterruptedException {
// use a new roll interval
config.put("kite.rollInterval", "1"); // in seconds
DatasetSink sink = sink(in, config);
Dataset<GenericRecord> records = Datasets.load(FILE_DATASET_URI);
// run the sink
sink.start();
sink.process();
Assert.assertEquals("Should have committed", 0, remaining(in));
Thread.sleep(1100); // sleep longer than the roll interval
sink.process(); // rolling happens in the process method
Assert.assertEquals(Sets.newHashSet(expected), read(records));
// wait until the end to stop because it would close the files
sink.stop();
}
示例3: dataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Ensures the given dataset exists, creating it if it doesn't
* and updating the schema if necessary.
*/
protected void dataset(String uri, DatasetDescriptor descriptor) {
try {
Datasets.create(uri, descriptor);
} catch (DatasetExistsException e) {
Dataset existingDataset = Datasets.load(uri);
DatasetDescriptor updated;
// The given discriptor might not have a location,
// so use the current one.
if (descriptor.getLocation() == null) {
updated = new DatasetDescriptor.Builder(descriptor)
.location(existingDataset.getDescriptor().getLocation())
.build();
} else {
updated = descriptor;
}
Datasets.update(uri, updated);
}
}
示例4: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hdfs:/tmp/data/users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : reader) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例5: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hive?dataset=users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : users.newReader()) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例6: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hdfs:/tmp/data/users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.with("favoriteColor", "green").newReader();
for (GenericRecord user : reader) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例7: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the products dataset
Dataset<Product> products = Datasets.load(
"dataset:hdfs:/tmp/data/products", Product.class);
// Get a reader for the dataset and read all the users
DatasetReader<Product> reader = null;
try {
reader = products.newReader();
for (Product product : reader) {
System.out.println(product);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例8: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the events dataset
Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events");
// Get a reader for the dataset and read all the events
DatasetReader<GenericRecord> reader = events.newReader();
try {
for (GenericRecord event : reader) {
System.out.println(event);
}
} finally {
reader.close();
}
return 0;
}
示例9: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Dataset<Record> movies = Datasets.load(
"dataset:hdfs:/tmp/data/movies", Record.class);
DatasetReader<Record> reader = null;
try {
reader = movies.newReader();
for (Record rec : reader) {
System.err.println("Movie: " + rec);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例10: verifyHiveDataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private void verifyHiveDataset(String tableName, Object[][] valsArray) {
String datasetUri = String.format("dataset:hive:default/%s",
tableName.toLowerCase());
assertTrue(Datasets.exists(datasetUri));
Dataset dataset = Datasets.load(datasetUri);
assertFalse(dataset.isEmpty());
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
List<String> expectations = new ArrayList<String>();
if (valsArray != null) {
for (Object[] vals : valsArray) {
expectations.add(Arrays.toString(vals));
}
}
while (reader.hasNext() && expectations.size() > 0) {
String actual = Arrays.toString(
convertGenericRecordToArray(reader.next()));
assertTrue("Expect record: " + actual, expectations.remove(actual));
}
assertFalse(reader.hasNext());
assertEquals(0, expectations.size());
} finally {
reader.close();
}
}
示例11: testMultiTableImportAsParquetFormat
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
public void testMultiTableImportAsParquetFormat() throws IOException {
String [] argv = getArgv(new String[]{"--as-parquetfile"}, null);
runImport(new ImportAllTablesTool(), argv);
Path warehousePath = new Path(this.getWarehouseDir());
int i = 0;
for (String tableName : this.tableNames) {
Path tablePath = new Path(warehousePath, tableName);
Dataset dataset = Datasets.load("dataset:file:" + tablePath);
// dequeue the expected value for this table. This
// list has the same order as the tableNames list.
String expectedVal = Integer.toString(i++) + ","
+ this.expectedStrings.get(0);
this.expectedStrings.remove(0);
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
GenericRecord record = reader.next();
String line = record.get(0) + "," + record.get(1);
assertEquals("Table " + tableName + " expected a different string",
expectedVal, line);
assertFalse(reader.hasNext());
} finally {
reader.close();
}
}
}
示例12: getSchema
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public Schema getSchema(InitializerContext context,
LinkConfiguration linkConfig, FromJobConfiguration fromJobConfig) {
String uri = ConfigUtil.buildDatasetUri(
linkConfig.linkConfig, fromJobConfig.fromJobConfig.uri);
Dataset dataset = Datasets.load(uri);
org.apache.avro.Schema avroSchema = dataset.getDescriptor().getSchema();
return AvroDataTypeUtil.createSqoopSchema(avroSchema);
}
示例13: mergeDataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Merges a dataset into this.
*/
public void mergeDataset(String uri) {
FileSystemDataset<GenericRecord> update = Datasets.load(uri);
if (dataset instanceof FileSystemDataset) {
((FileSystemDataset<GenericRecord>) dataset).merge(update);
// And let's completely drop the temporary dataset
Datasets.delete(uri);
} else {
throw new SqoopException(
KiteConnectorError.GENERIC_KITE_CONNECTOR_0000, uri);
}
}
示例14: testJob
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testJob() throws IOException {
List<SmallEvent> events = Lists.newArrayList();
for (int i = 0; i < 10; ++i) {
SmallEvent event = SmallEvent.newBuilder()
.setSessionId("1234")
.setUserId(i)
.build();
events.add(event);
}
harness.writeMessages(StreamingSparkApp.TOPIC_NAME, events);
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
e.printStackTrace();
}
View<SmallEvent> view = Datasets.load(StreamingSparkApp.EVENTS_DS_URI, SmallEvent.class);
List<SmallEvent> results = loadWhenAvailable(view, 10);
Assert.assertEquals(events, results);
}
示例15: testJobConfiguration
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testJobConfiguration() {
Map<String,String> settings = ImmutableMap.of("test.app.level.setting", "appvalue",
"kite.job.write-config-job.test.job.level.setting", "jobvalue",
"kite.job.write-config-job.output.kv-output.test.output.level.setting", "outputvalue");
AppContext context = new AppContext(settings, getConfiguration());
TestScheduler scheduler = TestScheduler.load(WriteConfigOutputApp.class, context);
Instant nominalTime = new DateTime(2015, 5, 15, 12, 0, 0, 0, DateTimeZone.UTC).toInstant();
scheduler.runScheduledJobs(nominalTime);
Dataset<KeyValues> ds = Datasets.load(WriteConfigOutputApp.OUTPUT_DATASET, KeyValues.class);
DatasetReader<KeyValues> reader = ds.newReader();
try {
KeyValues kv = reader.next();
Assert.assertEquals(ImmutableMap.of(
"test.app.level.setting", "appvalue",
"test.job.level.setting", "jobvalue",
"output.kv-output.test.output.level.setting", "outputvalue"),
kv.getJobsettings());
Assert.assertEquals(ImmutableMap.of(
"test.app.level.setting", "appvalue",
"test.job.level.setting", "jobvalue",
"test.output.level.setting", "outputvalue"),
kv.getOutputsettings());
} finally {
Closeables.closeQuietly(reader);
}
}