本文整理汇总了Java中org.kitesdk.data.Datasets.exists方法的典型用法代码示例。如果您正苦于以下问题:Java Datasets.exists方法的具体用法?Java Datasets.exists怎么用?Java Datasets.exists使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.kitesdk.data.Datasets
的用法示例。
在下文中一共展示了Datasets.exists方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: SavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private SavePolicy(Context context) {
String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
Preconditions.checkArgument(uri != null, "Must set "
+ CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
+ "=save");
if (Datasets.exists(uri)) {
dataset = Datasets.load(uri, AvroFlumeEvent.class);
} else {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(AvroFlumeEvent.class)
.build();
dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
}
nEventsHandled = 0;
}
示例2: testFileStoreWithSavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testFileStoreWithSavePolicy() throws EventDeliveryException {
if (Datasets.exists(ERROR_DATASET_URI)) {
Datasets.delete(ERROR_DATASET_URI);
}
config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
DatasetSinkConstants.SAVE_FAILURE_POLICY);
config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
ERROR_DATASET_URI);
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(FILE_DATASET_URI)));
Assert.assertEquals("Should have committed", 0, remaining(in));
}
示例3: run
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(List<String> args) throws Exception {
String inputUri = uri;
String outputUri = "dataset:hive?dataset=correlated_events";
if (args.size() == 1) {
outputUri = args.get(0);
}
Preconditions.checkState(Datasets.exists(inputUri),
"input dataset doesn't exists");
if (!Datasets.exists(outputUri)) {
Datasets.create(outputUri, new DatasetDescriptor.Builder()
.format("avro")
.schema(CorrelatedEvents.class)
.build());
}
CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
task.run();
return 0;
}
示例4: testPartitionedData
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testPartitionedData() throws EventDeliveryException {
URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
try {
Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
.partitionStrategy(new PartitionStrategy.Builder()
.identity("id", 10) // partition by id
.build())
.build());
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
partitionedUri.toString());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(partitionedUri)));
Assert.assertEquals("Should have committed", 0, remaining(in));
} finally {
if (Datasets.exists(partitionedUri)) {
Datasets.delete(partitionedUri);
}
}
}
示例5: testMiniClusterStore
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testMiniClusterStore() throws EventDeliveryException, IOException {
// setup a minicluster
MiniDFSCluster cluster = new MiniDFSCluster
.Builder(new Configuration())
.build();
FileSystem dfs = cluster.getFileSystem();
Configuration conf = dfs.getConf();
URI hdfsUri = URI.create(
"dataset:" + conf.get("fs.defaultFS") + "/tmp/repo" + DATASET_NAME);
try {
// create a repository and dataset in HDFS
Datasets.create(hdfsUri, DESCRIPTOR);
// update the config to use the HDFS repository
config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, hdfsUri.toString());
DatasetSink sink = sink(in, config);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals(
Sets.newHashSet(expected),
read(Datasets.load(hdfsUri)));
Assert.assertEquals("Should have committed", 0, remaining(in));
} finally {
if (Datasets.exists(hdfsUri)) {
Datasets.delete(hdfsUri);
}
cluster.shutdown();
}
}
示例6: testMissingSchemaWithSavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testMissingSchemaWithSavePolicy() throws EventDeliveryException {
if (Datasets.exists(ERROR_DATASET_URI)) {
Datasets.delete(ERROR_DATASET_URI);
}
config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
DatasetSinkConstants.SAVE_FAILURE_POLICY);
config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
ERROR_DATASET_URI);
final DatasetSink sink = sink(in, config);
Event badEvent = new SimpleEvent();
badEvent.setHeaders(Maps.<String, String>newHashMap());
badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
putToChannel(in, badEvent);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals("Good records should have been written",
Sets.newHashSet(expected),
read(Datasets.load(FILE_DATASET_URI)));
Assert.assertEquals("Should not have rolled back", 0, remaining(in));
Assert.assertEquals("Should have saved the bad event",
Sets.newHashSet(AvroFlumeEvent.newBuilder()
.setBody(ByteBuffer.wrap(badEvent.getBody()))
.setHeaders(toUtf8Map(badEvent.getHeaders()))
.build()),
read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
}
示例7: testSerializedWithIncompatibleSchemasWithSavePolicy
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testSerializedWithIncompatibleSchemasWithSavePolicy()
throws EventDeliveryException {
if (Datasets.exists(ERROR_DATASET_URI)) {
Datasets.delete(ERROR_DATASET_URI);
}
config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
DatasetSinkConstants.SAVE_FAILURE_POLICY);
config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
ERROR_DATASET_URI);
final DatasetSink sink = sink(in, config);
GenericRecordBuilder builder = new GenericRecordBuilder(
INCOMPATIBLE_SCHEMA);
GenericData.Record rec = builder.set("username", "koala").build();
// We pass in a valid schema in the header, but an incompatible schema
// was used to serialize the record
Event badEvent = event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true);
putToChannel(in, badEvent);
// run the sink
sink.start();
sink.process();
sink.stop();
Assert.assertEquals("Good records should have been written",
Sets.newHashSet(expected),
read(Datasets.load(FILE_DATASET_URI)));
Assert.assertEquals("Should not have rolled back", 0, remaining(in));
Assert.assertEquals("Should have saved the bad event",
Sets.newHashSet(AvroFlumeEvent.newBuilder()
.setBody(ByteBuffer.wrap(badEvent.getBody()))
.setHeaders(toUtf8Map(badEvent.getHeaders()))
.build()),
read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
}
示例8: tearDown
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void tearDown() {
super.tearDown();
String uri = "dataset:file:" + getTablePath();
if (Datasets.exists(uri)) {
Datasets.delete(uri);
}
}
示例9: initialize
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void initialize(InitializerContext context,
LinkConfiguration linkConfig, FromJobConfiguration fromJobConfig) {
String uri = ConfigUtil.buildDatasetUri(
linkConfig.linkConfig, fromJobConfig.fromJobConfig.uri);
LOG.debug("Constructed dataset URI: " + uri);
if (!Datasets.exists(uri)) {
LOG.error("Dataset does not exist");
throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0002);
}
}
示例10: initialize
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void initialize(InitializerContext context,
LinkConfiguration linkConfig, ToJobConfiguration toJobConfig) {
String uri = ConfigUtil.buildDatasetUri(
linkConfig.linkConfig, toJobConfig.toJobConfig);
LOG.debug("Constructed dataset URI: " + uri);
if (Datasets.exists(uri)) {
LOG.error("Overwrite an existing dataset is not expected in new create mode.");
throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0001);
}
}
示例11: createOrUpdateDatasets
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Create the HBase datasets in the map of dataset names to schema files
*
* @param zkHost
* HBase zookeeper client hostname
* @param zkPort
* HBase zookeeper client port
* @param datasetNameSchemaMap
* A map of dataset names to the Avro schema files that we want to
* create. The schema files are a location, which can be a location
* on the classpath, represented with a "classpath:/" prefix.
* @return THe list of created datasets.
* @throws URISyntaxException
* @throws IOException
*/
public static List<RandomAccessDataset<?>> createOrUpdateDatasets(
String zkHost, String zkPort, Map<String, String> datasetNameSchemaMap)
throws URISyntaxException, IOException {
createManagedSchemasTable(zkHost, zkPort);
List<RandomAccessDataset<?>> datasets = new ArrayList<RandomAccessDataset<?>>();
for (Entry<String, String> entry : datasetNameSchemaMap.entrySet()) {
String datasetName = entry.getKey();
String schemaLocation = entry.getValue();
File schemaFile;
if (schemaLocation.startsWith(CLASSPATH_PREFIX)) {
schemaLocation = schemaLocation.substring(CLASSPATH_PREFIX.length());
schemaFile = new File(DevHelper.class.getClassLoader()
.getResource(schemaLocation).toURI());
} else {
schemaFile = new File(schemaLocation);
}
DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(
schemaFile).build();
String datasetURI = "dataset:hbase:" + zkHost + ":" + zkPort + "/"
+ datasetName;
if (!Datasets.exists(datasetURI)) {
datasets
.add((RandomAccessDataset<?>) Datasets.create(datasetURI, desc));
} else {
datasets
.add((RandomAccessDataset<?>) Datasets.update(datasetURI, desc));
}
}
return datasets;
}
示例12: configureImportJob
import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
* Configure the import job. The import process will use a Kite dataset to
* write data records into Parquet format internally. The input key class is
* {@link org.apache.sqoop.lib.SqoopRecord}. The output key is
* {@link org.apache.avro.generic.GenericRecord}.
*/
public static void configureImportJob(JobConf conf, Schema schema,
String uri, WriteMode writeMode) throws IOException {
Dataset dataset;
// Add hive delegation token only if we don't already have one.
if (uri.startsWith("dataset:hive")) {
Configuration hiveConf = HiveConfig.getHiveConf(conf);
if (isSecureMetastore(hiveConf)) {
// Copy hive configs to job config
HiveConfig.addHiveConfigs(hiveConf, conf);
if (conf.getCredentials().getToken(new Text(HIVE_METASTORE_TOKEN_ALIAS)) == null) {
addHiveDelegationToken(conf);
}
}
}
if (Datasets.exists(uri)) {
if (WriteMode.DEFAULT.equals(writeMode)) {
throw new IOException("Destination exists! " + uri);
}
dataset = Datasets.load(uri);
Schema writtenWith = dataset.getDescriptor().getSchema();
if (!SchemaValidationUtil.canRead(writtenWith, schema)) {
throw new IOException(
String.format("Expected schema: %s%nActual schema: %s",
writtenWith, schema));
}
} else {
dataset = createDataset(schema, getCompressionType(conf), uri);
}
conf.set(CONF_AVRO_SCHEMA, schema.toString());
DatasetKeyOutputFormat.ConfigBuilder builder =
DatasetKeyOutputFormat.configure(conf);
if (WriteMode.OVERWRITE.equals(writeMode)) {
builder.overwrite(dataset);
} else if (WriteMode.APPEND.equals(writeMode)) {
builder.appendTo(dataset);
} else {
builder.writeTo(dataset);
}
}