本文整理汇总了Java中org.kitesdk.data.Datasets.create方法的典型用法代码示例。如果您正苦于以下问题:Java Datasets.create方法的具体用法?Java Datasets.create怎么用?Java Datasets.create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.kitesdk.data.Datasets
的用法示例。
在下文中一共展示了Datasets.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: SavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private SavePolicy(Context context) {
String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
Preconditions.checkArgument(uri != null, "Must set "
+ CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
+ "=save");
if (Datasets.exists(uri)) {
dataset = Datasets.load(uri, AvroFlumeEvent.class);
} else {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(AvroFlumeEvent.class)
.build();
dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
}
nEventsHandled = 0;
}
示例2: dataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Ensures the given dataset exists, creating it if it doesn't
* and updating the schema if necessary.
*/
protected void dataset(String uri, DatasetDescriptor descriptor) {
try {
Datasets.create(uri, descriptor);
} catch (DatasetExistsException e) {
Dataset existingDataset = Datasets.load(uri);
DatasetDescriptor updated;
// The given discriptor might not have a location,
// so use the current one.
if (descriptor.getLocation() == null) {
updated = new DatasetDescriptor.Builder(descriptor)
.location(existingDataset.getDescriptor().getLocation())
.build();
} else {
updated = descriptor;
}
Datasets.update(uri, updated);
}
}
示例3: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(List<String> args) throws Exception {
String inputUri = uri;
String outputUri = "dataset:hive?dataset=correlated_events";
if (args.size() == 1) {
outputUri = args.get(0);
}
Preconditions.checkState(Datasets.exists(inputUri),
"input dataset doesn't exists");
if (!Datasets.exists(outputUri)) {
Datasets.create(outputUri, new DatasetDescriptor.Builder()
.format("avro")
.schema(CorrelatedEvents.class)
.build());
}
CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
task.run();
return 0;
}
示例4: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(List<String> args) throws Exception {
Preconditions.checkState(!Datasets.exists(uri),
"events dataset already exists");
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(StandardEvent.class).build();
View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class);
DatasetWriter<StandardEvent> writer = events.newWriter();
try {
while (System.currentTimeMillis() - baseTimestamp < 36000) {
writer.write(generateRandomEvent());
}
} finally {
writer.close();
}
System.out.println("Generated " + counter + " events");
return 0;
}
示例5: setup
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Before
public void setup() throws EventDeliveryException {
Datasets.delete(FILE_DATASET_URI);
Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
this.config = new Context();
config.put("keep-alive", "0");
this.in = new MemoryChannel();
Configurables.configure(in, config);
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, FILE_DATASET_URI);
GenericRecordBuilder builder = new GenericRecordBuilder(RECORD_SCHEMA);
expected = Lists.<GenericRecord>newArrayList(
builder.set("id", "1").set("msg", "msg1").build(),
builder.set("id", "2").set("msg", "msg2").build(),
builder.set("id", "3").set("msg", "msg3").build());
putToChannel(in, Iterables.transform(expected,
new Function<GenericRecord, Event>() {
private int i = 0;
@Override
public Event apply(@Nullable GenericRecord rec) {
this.i += 1;
boolean useURI = (i % 2) == 0;
return event(rec, RECORD_SCHEMA, SCHEMA_FILE, useURI);
}
}));
}
示例6: testParquetDataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testParquetDataset() throws EventDeliveryException {
Datasets.delete(FILE_DATASET_URI);
Dataset<GenericRecord> created = Datasets.create(FILE_DATASET_URI,
new DatasetDescriptor.Builder(DESCRIPTOR)
.format("parquet")
.build());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
// the transaction should not commit during the call to process
assertThrows("Transaction should still be open", IllegalStateException.class,
new Callable() {
@Override
public Object call() throws EventDeliveryException {
in.getTransaction().begin();
return null;
}
});
// The records won't commit until the call to stop()
Assert.assertEquals("Should not have committed", 0, read(created).size());
sink.stop();
Assert.assertEquals(Sets.newHashSet(expected), read(created));
Assert.assertEquals("Should have committed", 0, remaining(in));
}
示例7: testPartitionedData
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testPartitionedData() throws EventDeliveryException {
URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
try {
Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
.partitionStrategy(new PartitionStrategy.Builder()
.identity("id", 10) // partition by id
.build())
.build());
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
partitionedUri.toString());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(partitionedUri)));
Assert.assertEquals("Should have committed", 0, remaining(in));
} finally {
if (Datasets.exists(partitionedUri)) {
Datasets.delete(partitionedUri);
}
}
}
示例8: testStartBeforeDatasetCreated
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testStartBeforeDatasetCreated() throws EventDeliveryException {
// delete the dataset created by setup
Datasets.delete(FILE_DATASET_URI);
DatasetSink sink = sink(in, config);
// start the sink
sink.start();
// run the sink without a target dataset
try {
sink.process();
Assert.fail("Should have thrown an exception: no such dataset");
} catch (EventDeliveryException e) {
// expected
}
// create the target dataset
Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
// run the sink
sink.process();
sink.stop();
Assert.assertEquals(Sets.newHashSet(expected), read(Datasets.load(FILE_DATASET_URI)));
Assert.assertEquals("Should have committed", 0, remaining(in));
}
示例9: testMiniClusterStore
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testMiniClusterStore() throws EventDeliveryException, IOException {
// setup a minicluster
MiniDFSCluster cluster = new MiniDFSCluster
.Builder(new Configuration())
.build();
FileSystem dfs = cluster.getFileSystem();
Configuration conf = dfs.getConf();
URI hdfsUri = URI.create(
"dataset:" + conf.get("fs.defaultFS") + "/tmp/repo" + DATASET_NAME);
try {
// create a repository and dataset in HDFS
Datasets.create(hdfsUri, DESCRIPTOR);
// update the config to use the HDFS repository
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, hdfsUri.toString());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(hdfsUri)));
Assert.assertEquals("Should have committed", 0, remaining(in));
} finally {
if (Datasets.exists(hdfsUri)) {
Datasets.delete(hdfsUri);
}
cluster.shutdown();
}
}
示例10: createDataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private static Dataset createDataset(Schema schema,
CompressionType compressionType, String uri) {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(schema)
.format(Formats.PARQUET)
.compressionType(compressionType)
.build();
return Datasets.create(uri, descriptor, GenericRecord.class);
}
示例11: configureInputFormat
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName,
String splitByCol) throws ClassNotFoundException, IOException {
fileType = getInputFileType();
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
if (isHCatJob) {
SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(),
tableName, job.getConfiguration());
return;
} else if (fileType == FileType.AVRO_DATA_FILE) {
LOG.debug("Configuring for Avro export");
configureGenericRecordExportInputFormat(job, tableName);
} else if (fileType == FileType.PARQUET_FILE) {
LOG.debug("Configuring for Parquet export");
configureGenericRecordExportInputFormat(job, tableName);
FileSystem fs = FileSystem.get(job.getConfiguration());
String uri = "dataset:" + fs.makeQualified(getInputPath());
Exception caughtException = null;
try {
DatasetKeyInputFormat.configure(job).readFrom(uri);
} catch (DatasetNotFoundException e) {
LOG.warn(e.getMessage(), e);
LOG.warn("Trying to get data schema from parquet file directly");
caughtException = e;
}
if (caughtException != null && caughtException instanceof DatasetNotFoundException) {
DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri);
Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class);
DatasetKeyInputFormat.configure(job).readFrom(dataset);
}
}
FileInputFormat.addInputPath(job, getInputPath());
}
示例12: createDataset
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Creates a new dataset.
*/
public static Dataset<GenericRecord> createDataset(String uri, org.apache.sqoop.schema.Schema schema,
FileFormat format) {
Schema datasetSchema = KiteDataTypeUtil.createAvroSchema(schema);
Format datasetFormat = KiteDataTypeUtil.toFormat(format);
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.property("kite.allow.csv", "true")
.schema(datasetSchema)
.format(datasetFormat)
.build();
return Datasets.create(uri, descriptor);
}
示例13: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create a partition strategy that hash partitions on username with 10 buckets
PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
.identity("favoriteColor", "favorite_color")
.build();
// Create a dataset of users with the Avro schema
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")
.partitionStrategy(partitionStrategy)
.build();
Dataset<Record> users = Datasets.create(
"dataset:hdfs:/tmp/data/users", descriptor, Record.class);
// Get a writer for the dataset and write some users to it
DatasetWriter<Record> writer = null;
try {
writer = users.newWriter();
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
Record record = builder.set("username", "user-" + i)
.set("creationDate", System.currentTimeMillis())
.set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
if (writer != null) {
writer.close();
}
}
return 0;
}
示例14: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create a dataset of users with the Avro schema
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")
.build();
Dataset<Record> users = Datasets.create(
"dataset:hdfs:/tmp/data/users", descriptor, Record.class);
// Get a writer for the dataset and write some users to it
DatasetWriter<Record> writer = null;
try {
writer = users.newWriter();
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
Record record = builder.set("username", "user-" + i)
.set("creationDate", System.currentTimeMillis())
.set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
if (writer != null) {
writer.close();
}
}
return 0;
}
示例15: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaUri("resource:user.avsc")
.format(Formats.PARQUET)
.build();
Dataset<Record> users = Datasets.create(
"dataset:hdfs:/tmp/data/users", descriptor, Record.class);
// Get a writer for the dataset and write some users to it
DatasetWriter<Record> writer = null;
try {
writer = users.newWriter();
Random rand = new Random();
GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
for (int i = 0; i < 100; i++) {
Record record = builder.set("username", "user-" + i)
.set("creationDate", System.currentTimeMillis())
.set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
writer.write(record);
}
} finally {
if (writer != null) {
writer.close();
}
}
return 0;
}