本文整理汇总了Java中org.kitesdk.data.Dataset类的典型用法代码示例。如果您正苦于以下问题:Java Dataset类的具体用法?Java Dataset怎么用?Java Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Dataset类属于org.kitesdk.data包,在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testTimedFileRolling
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Test
public void testTimedFileRolling()
throws EventDeliveryException, InterruptedException {
// use a new roll interval
config.put("kite.rollInterval", "1"); // in seconds
DatasetSink sink = sink(in, config);
Dataset<GenericRecord> records = Datasets.load(FILE_DATASET_URI);
// run the sink
sink.start();
sink.process();
Assert.assertEquals("Should have committed", 0, remaining(in));
Thread.sleep(1100); // sleep longer than the roll interval
sink.process(); // rolling happens in the process method
Assert.assertEquals(Sets.newHashSet(expected), read(records));
// wait until the end to stop because it would close the files
sink.stop();
}
示例2: dataset
import org.kitesdk.data.Dataset; //导入依赖的package包/类
/**
* Ensures the given dataset exists, creating it if it doesn't
* and updating the schema if necessary.
*/
protected void dataset(String uri, DatasetDescriptor descriptor) {
try {
Datasets.create(uri, descriptor);
} catch (DatasetExistsException e) {
Dataset existingDataset = Datasets.load(uri);
DatasetDescriptor updated;
// The given discriptor might not have a location,
// so use the current one.
if (descriptor.getLocation() == null) {
updated = new DatasetDescriptor.Builder(descriptor)
.location(existingDataset.getDescriptor().getLocation())
.build();
} else {
updated = descriptor;
}
Datasets.update(uri, updated);
}
}
示例3: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hdfs:/tmp/data/users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : reader) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例4: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hive?dataset=users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : users.newReader()) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例5: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hdfs:/tmp/data/users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.with("favoriteColor", "green").newReader();
for (GenericRecord user : reader) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例6: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the products dataset
Dataset<Product> products = Datasets.load(
"dataset:hdfs:/tmp/data/products", Product.class);
// Get a reader for the dataset and read all the users
DatasetReader<Product> reader = null;
try {
reader = products.newReader();
for (Product product : reader) {
System.out.println(product);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例7: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the events dataset
Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events");
// Get a reader for the dataset and read all the events
DatasetReader<GenericRecord> reader = events.newReader();
try {
for (GenericRecord event : reader) {
System.out.println(event);
}
} finally {
reader.close();
}
return 0;
}
示例8: run
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Dataset<Record> movies = Datasets.load(
"dataset:hdfs:/tmp/data/movies", Record.class);
DatasetReader<Record> reader = null;
try {
reader = movies.newReader();
for (Record rec : reader) {
System.err.println("Movie: " + rec);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例9: testParquetDataset
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Test
public void testParquetDataset() throws EventDeliveryException {
Datasets.delete(FILE_DATASET_URI);
Dataset<GenericRecord> created = Datasets.create(FILE_DATASET_URI,
new DatasetDescriptor.Builder(DESCRIPTOR)
.format("parquet")
.build());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
// the transaction should not commit during the call to process
assertThrows("Transaction should still be open", IllegalStateException.class,
new Callable() {
@Override
public Object call() throws EventDeliveryException {
in.getTransaction().begin();
return null;
}
});
// The records won't commit until the call to stop()
Assert.assertEquals("Should not have committed", 0, read(created).size());
sink.stop();
Assert.assertEquals(Sets.newHashSet(expected), read(created));
Assert.assertEquals("Should have committed", 0, remaining(in));
}
示例10: createDataset
import org.kitesdk.data.Dataset; //导入依赖的package包/类
private static Dataset createDataset(Schema schema,
CompressionType compressionType, String uri) {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(schema)
.format(Formats.PARQUET)
.compressionType(compressionType)
.build();
return Datasets.create(uri, descriptor, GenericRecord.class);
}
示例11: configureInputFormat
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName,
String splitByCol) throws ClassNotFoundException, IOException {
fileType = getInputFileType();
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
if (isHCatJob) {
SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(),
tableName, job.getConfiguration());
return;
} else if (fileType == FileType.AVRO_DATA_FILE) {
LOG.debug("Configuring for Avro export");
configureGenericRecordExportInputFormat(job, tableName);
} else if (fileType == FileType.PARQUET_FILE) {
LOG.debug("Configuring for Parquet export");
configureGenericRecordExportInputFormat(job, tableName);
FileSystem fs = FileSystem.get(job.getConfiguration());
String uri = "dataset:" + fs.makeQualified(getInputPath());
Exception caughtException = null;
try {
DatasetKeyInputFormat.configure(job).readFrom(uri);
} catch (DatasetNotFoundException e) {
LOG.warn(e.getMessage(), e);
LOG.warn("Trying to get data schema from parquet file directly");
caughtException = e;
}
if (caughtException != null && caughtException instanceof DatasetNotFoundException) {
DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri);
Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class);
DatasetKeyInputFormat.configure(job).readFrom(dataset);
}
}
FileInputFormat.addInputPath(job, getInputPath());
}
示例12: verifyHiveDataset
import org.kitesdk.data.Dataset; //导入依赖的package包/类
private void verifyHiveDataset(String tableName, Object[][] valsArray) {
String datasetUri = String.format("dataset:hive:default/%s",
tableName.toLowerCase());
assertTrue(Datasets.exists(datasetUri));
Dataset dataset = Datasets.load(datasetUri);
assertFalse(dataset.isEmpty());
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
List<String> expectations = new ArrayList<String>();
if (valsArray != null) {
for (Object[] vals : valsArray) {
expectations.add(Arrays.toString(vals));
}
}
while (reader.hasNext() && expectations.size() > 0) {
String actual = Arrays.toString(
convertGenericRecordToArray(reader.next()));
assertTrue("Expect record: " + actual, expectations.remove(actual));
}
assertFalse(reader.hasNext());
assertEquals(0, expectations.size());
} finally {
reader.close();
}
}
示例13: testMultiTableImportAsParquetFormat
import org.kitesdk.data.Dataset; //导入依赖的package包/类
public void testMultiTableImportAsParquetFormat() throws IOException {
String [] argv = getArgv(new String[]{"--as-parquetfile"}, null);
runImport(new ImportAllTablesTool(), argv);
Path warehousePath = new Path(this.getWarehouseDir());
int i = 0;
for (String tableName : this.tableNames) {
Path tablePath = new Path(warehousePath, tableName);
Dataset dataset = Datasets.load("dataset:file:" + tablePath);
// dequeue the expected value for this table. This
// list has the same order as the tableNames list.
String expectedVal = Integer.toString(i++) + ","
+ this.expectedStrings.get(0);
this.expectedStrings.remove(0);
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
GenericRecord record = reader.next();
String line = record.get(0) + "," + record.get(1);
assertEquals("Table " + tableName + " expected a different string",
expectedVal, line);
assertFalse(reader.hasNext());
} finally {
reader.close();
}
}
}
示例14: getExecutor
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@VisibleForTesting
KiteDatasetExecutor getExecutor(LinkConfiguration linkConfiguration, String uri, Schema schema,
FileFormat format) {
// Note that instead of creating a dataset at destination, we create a
// temporary dataset by every KiteLoader instance. They will be merged when
// all data portions are written successfully. Unfortunately, KiteLoader is
// not able to pass the temporary dataset uri to KiteToDestroyer. So we
// delegate KiteDatasetExecutor to manage name convention for datasets.
uri = KiteDatasetExecutor.suggestTemporaryDatasetUri(linkConfiguration.linkConfig, uri);
LOG.info("Constructed temporary dataset URI: " + uri);
Dataset<GenericRecord> dataset =
KiteDatasetExecutor.createDataset(uri, schema, format);
return new KiteDatasetExecutor(dataset);
}
示例15: getSchema
import org.kitesdk.data.Dataset; //导入依赖的package包/类
@Override
public Schema getSchema(InitializerContext context,
LinkConfiguration linkConfig, FromJobConfiguration fromJobConfig) {
String uri = ConfigUtil.buildDatasetUri(
linkConfig.linkConfig, fromJobConfig.fromJobConfig.uri);
Dataset dataset = Datasets.load(uri);
org.apache.avro.Schema avroSchema = dataset.getDescriptor().getSchema();
return AvroDataTypeUtil.createSqoopSchema(avroSchema);
}