当前位置: 首页>>代码示例>>Java>>正文


Java Datasets.exists方法代码示例

本文整理汇总了Java中org.kitesdk.data.Datasets.exists方法的典型用法代码示例。如果您正苦于以下问题:Java Datasets.exists方法的具体用法?Java Datasets.exists怎么用?Java Datasets.exists使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.kitesdk.data.Datasets的用法示例。


在下文中一共展示了Datasets.exists方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: SavePolicy

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
private SavePolicy(Context context) {
  String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
  Preconditions.checkArgument(uri != null, "Must set "
      + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
      + "=save");
  if (Datasets.exists(uri)) {
    dataset = Datasets.load(uri, AvroFlumeEvent.class);
  } else {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(AvroFlumeEvent.class)
        .build();
    dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
  }

  nEventsHandled = 0;
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:17,代码来源:SavePolicy.java

示例2: testFileStoreWithSavePolicy

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testFileStoreWithSavePolicy() throws EventDeliveryException {
  if (Datasets.exists(ERROR_DATASET_URI)) {
    Datasets.delete(ERROR_DATASET_URI);
  }
  config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
      DatasetSinkConstants.SAVE_FAILURE_POLICY);
  config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
      ERROR_DATASET_URI);
  DatasetSink sink = sink(in, config);

  // run the sink
  sink.start();
  sink.process();
  sink.stop();

  Assert.assertEquals(
      Sets.newHashSet(expected),
      read(Datasets.load(FILE_DATASET_URI)));
  Assert.assertEquals("Should have committed", 0, remaining(in));
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:22,代码来源:TestDatasetSink.java

示例3: run

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public int run(List<String> args) throws Exception {

  String inputUri = uri;
  String outputUri = "dataset:hive?dataset=correlated_events";

  if (args.size() == 1) {
    outputUri = args.get(0);
  }

  Preconditions.checkState(Datasets.exists(inputUri),
      "input dataset doesn't exists");

  if (!Datasets.exists(outputUri)) {
    Datasets.create(outputUri, new DatasetDescriptor.Builder()
        .format("avro")
        .schema(CorrelatedEvents.class)
        .build());
  }
  CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
  task.run();

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:25,代码来源:CorrelateEvents.java

示例4: testPartitionedData

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testPartitionedData() throws EventDeliveryException {
  URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
  try {
    Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
        .partitionStrategy(new PartitionStrategy.Builder()
            .identity("id", 10) // partition by id
            .build())
        .build());

    config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
        partitionedUri.toString());
    DatasetSink sink = sink(in, config);

    // run the sink
    sink.start();
    sink.process();
    sink.stop();

    Assert.assertEquals(
        Sets.newHashSet(expected),
        read(Datasets.load(partitionedUri)));
    Assert.assertEquals("Should have committed", 0, remaining(in));
  } finally {
    if (Datasets.exists(partitionedUri)) {
      Datasets.delete(partitionedUri);
    }
  }
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:30,代码来源:TestDatasetSink.java

示例5: testMiniClusterStore

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testMiniClusterStore() throws EventDeliveryException, IOException {
  // setup a minicluster
  MiniDFSCluster cluster = new MiniDFSCluster
      .Builder(new Configuration())
      .build();

  FileSystem dfs = cluster.getFileSystem();
  Configuration conf = dfs.getConf();

  URI hdfsUri = URI.create(
      "dataset:" + conf.get("fs.defaultFS") + "/tmp/repo" + DATASET_NAME);
  try {
    // create a repository and dataset in HDFS
    Datasets.create(hdfsUri, DESCRIPTOR);

    // update the config to use the HDFS repository
    config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, hdfsUri.toString());

    DatasetSink sink = sink(in, config);

    // run the sink
    sink.start();
    sink.process();
    sink.stop();

    Assert.assertEquals(
        Sets.newHashSet(expected),
        read(Datasets.load(hdfsUri)));
    Assert.assertEquals("Should have committed", 0, remaining(in));

  } finally {
    if (Datasets.exists(hdfsUri)) {
      Datasets.delete(hdfsUri);
    }
    cluster.shutdown();
  }
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:39,代码来源:TestDatasetSink.java

示例6: testMissingSchemaWithSavePolicy

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testMissingSchemaWithSavePolicy() throws EventDeliveryException {
  if (Datasets.exists(ERROR_DATASET_URI)) {
    Datasets.delete(ERROR_DATASET_URI);
  }
  config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
      DatasetSinkConstants.SAVE_FAILURE_POLICY);
  config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
      ERROR_DATASET_URI);
  final DatasetSink sink = sink(in, config);

  Event badEvent = new SimpleEvent();
  badEvent.setHeaders(Maps.<String, String>newHashMap());
  badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
  putToChannel(in, badEvent);

  // run the sink
  sink.start();
  sink.process();
  sink.stop();

  Assert.assertEquals("Good records should have been written",
      Sets.newHashSet(expected),
      read(Datasets.load(FILE_DATASET_URI)));
  Assert.assertEquals("Should not have rolled back", 0, remaining(in));
  Assert.assertEquals("Should have saved the bad event",
      Sets.newHashSet(AvroFlumeEvent.newBuilder()
        .setBody(ByteBuffer.wrap(badEvent.getBody()))
        .setHeaders(toUtf8Map(badEvent.getHeaders()))
        .build()),
      read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:33,代码来源:TestDatasetSink.java

示例7: testSerializedWithIncompatibleSchemasWithSavePolicy

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Test
public void testSerializedWithIncompatibleSchemasWithSavePolicy()
    throws EventDeliveryException {
  if (Datasets.exists(ERROR_DATASET_URI)) {
    Datasets.delete(ERROR_DATASET_URI);
  }
  config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
      DatasetSinkConstants.SAVE_FAILURE_POLICY);
  config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
      ERROR_DATASET_URI);
  final DatasetSink sink = sink(in, config);

  GenericRecordBuilder builder = new GenericRecordBuilder(
      INCOMPATIBLE_SCHEMA);
  GenericData.Record rec = builder.set("username", "koala").build();

  // We pass in a valid schema in the header, but an incompatible schema
  // was used to serialize the record
  Event badEvent = event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true);
  putToChannel(in, badEvent);

  // run the sink
  sink.start();
  sink.process();
  sink.stop();

  Assert.assertEquals("Good records should have been written",
      Sets.newHashSet(expected),
      read(Datasets.load(FILE_DATASET_URI)));
  Assert.assertEquals("Should not have rolled back", 0, remaining(in));
  Assert.assertEquals("Should have saved the bad event",
      Sets.newHashSet(AvroFlumeEvent.newBuilder()
        .setBody(ByteBuffer.wrap(badEvent.getBody()))
        .setHeaders(toUtf8Map(badEvent.getHeaders()))
        .build()),
      read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:38,代码来源:TestDatasetSink.java

示例8: tearDown

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void tearDown() {
  super.tearDown();
  String uri = "dataset:file:" + getTablePath();
  if (Datasets.exists(uri)) {
    Datasets.delete(uri);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:9,代码来源:TestParquetImport.java

示例9: initialize

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void initialize(InitializerContext context,
    LinkConfiguration linkConfig, FromJobConfiguration fromJobConfig) {
  String uri = ConfigUtil.buildDatasetUri(
      linkConfig.linkConfig, fromJobConfig.fromJobConfig.uri);
  LOG.debug("Constructed dataset URI: " + uri);
  if (!Datasets.exists(uri)) {
    LOG.error("Dataset does not exist");
    throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0002);
  }
}
 
开发者ID:vybs,项目名称:sqoop-on-spark,代码行数:12,代码来源:KiteFromInitializer.java

示例10: initialize

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
@Override
public void initialize(InitializerContext context,
    LinkConfiguration linkConfig, ToJobConfiguration toJobConfig) {
  String uri = ConfigUtil.buildDatasetUri(
      linkConfig.linkConfig, toJobConfig.toJobConfig);
  LOG.debug("Constructed dataset URI: " + uri);
  if (Datasets.exists(uri)) {
    LOG.error("Overwrite an existing dataset is not expected in new create mode.");
    throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0001);
  }
}
 
开发者ID:vybs,项目名称:sqoop-on-spark,代码行数:12,代码来源:KiteToInitializer.java

示例11: createOrUpdateDatasets

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
 * Create the HBase datasets in the map of dataset names to schema files
 * 
 * @param zkHost
 *          HBase zookeeper client hostname
 * @param zkPort
 *          HBase zookeeper client port
 * @param datasetNameSchemaMap
 *          A map of dataset names to the Avro schema files that we want to
 *          create. The schema files are a location, which can be a location
 *          on the classpath, represented with a "classpath:/" prefix.
 * @return THe list of created datasets.
 * @throws URISyntaxException
 * @throws IOException
 */
public static List<RandomAccessDataset<?>> createOrUpdateDatasets(
    String zkHost, String zkPort, Map<String, String> datasetNameSchemaMap)
    throws URISyntaxException, IOException {

  createManagedSchemasTable(zkHost, zkPort);

  List<RandomAccessDataset<?>> datasets = new ArrayList<RandomAccessDataset<?>>();
  for (Entry<String, String> entry : datasetNameSchemaMap.entrySet()) {
    String datasetName = entry.getKey();
    String schemaLocation = entry.getValue();
    File schemaFile;
    if (schemaLocation.startsWith(CLASSPATH_PREFIX)) {
      schemaLocation = schemaLocation.substring(CLASSPATH_PREFIX.length());
      schemaFile = new File(DevHelper.class.getClassLoader()
          .getResource(schemaLocation).toURI());
    } else {
      schemaFile = new File(schemaLocation);
    }
    DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(
        schemaFile).build();

    String datasetURI = "dataset:hbase:" + zkHost + ":" + zkPort + "/"
        + datasetName;
    if (!Datasets.exists(datasetURI)) {
      datasets
          .add((RandomAccessDataset<?>) Datasets.create(datasetURI, desc));
    } else {
      datasets
          .add((RandomAccessDataset<?>) Datasets.update(datasetURI, desc));
    }
  }
  return datasets;
}
 
开发者ID:awarring,项目名称:kite-spring-hbase-example,代码行数:49,代码来源:DevHelper.java

示例12: configureImportJob

import org.kitesdk.data.Datasets; //导入方法依赖的package包/类
/**
 * Configure the import job. The import process will use a Kite dataset to
 * write data records into Parquet format internally. The input key class is
 * {@link org.apache.sqoop.lib.SqoopRecord}. The output key is
 * {@link org.apache.avro.generic.GenericRecord}.
 */
public static void configureImportJob(JobConf conf, Schema schema,
    String uri, WriteMode writeMode) throws IOException {
  Dataset dataset;

  // Add hive delegation token only if we don't already have one.
  if (uri.startsWith("dataset:hive")) {
    Configuration hiveConf = HiveConfig.getHiveConf(conf);
    if (isSecureMetastore(hiveConf)) {
      // Copy hive configs to job config
      HiveConfig.addHiveConfigs(hiveConf, conf);

      if (conf.getCredentials().getToken(new Text(HIVE_METASTORE_TOKEN_ALIAS)) == null) {
        addHiveDelegationToken(conf);
      }
    }
  }

  if (Datasets.exists(uri)) {
    if (WriteMode.DEFAULT.equals(writeMode)) {
      throw new IOException("Destination exists! " + uri);
    }

    dataset = Datasets.load(uri);
    Schema writtenWith = dataset.getDescriptor().getSchema();
    if (!SchemaValidationUtil.canRead(writtenWith, schema)) {
      throw new IOException(
          String.format("Expected schema: %s%nActual schema: %s",
              writtenWith, schema));
    }
  } else {
    dataset = createDataset(schema, getCompressionType(conf), uri);
  }
  conf.set(CONF_AVRO_SCHEMA, schema.toString());

  DatasetKeyOutputFormat.ConfigBuilder builder =
      DatasetKeyOutputFormat.configure(conf);
  if (WriteMode.OVERWRITE.equals(writeMode)) {
    builder.overwrite(dataset);
  } else if (WriteMode.APPEND.equals(writeMode)) {
    builder.appendTo(dataset);
  } else {
    builder.writeTo(dataset);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:51,代码来源:ParquetJob.java


注:本文中的org.kitesdk.data.Datasets.exists方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。