当前位置: 首页>>代码示例>>Java>>正文


Java DatasetDescriptor类代码示例

本文整理汇总了Java中org.kitesdk.data.DatasetDescriptor的典型用法代码示例。如果您正苦于以下问题:Java DatasetDescriptor类的具体用法?Java DatasetDescriptor怎么用?Java DatasetDescriptor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DatasetDescriptor类属于org.kitesdk.data包,在下文中一共展示了DatasetDescriptor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: SavePolicy

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
private SavePolicy(Context context) {
  String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
  Preconditions.checkArgument(uri != null, "Must set "
      + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
      + "=save");
  if (Datasets.exists(uri)) {
    dataset = Datasets.load(uri, AvroFlumeEvent.class);
  } else {
    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(AvroFlumeEvent.class)
        .build();
    dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
  }

  nEventsHandled = 0;
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:17,代码来源:SavePolicy.java

示例2: setup

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
public void setup(AppContext context) {

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(SmallEvent.class)
      .build();

  dataset(EVENTS_DS_URI, descriptor);

  Topics.createTopic(context, TOPIC_NAME, 1, 1, SmallEvent.getClassSchema());

  StreamDescription streamDescription = new StreamDescription.Builder()
      .jobName("simple-spark-streaming")
      .jobClass(StreamingSparkJob.class)
      .withStream("event_stream", Topics.topic(TOPIC_NAME))
      .withView("event_output", EVENTS_DS_URI)
      .build();

  stream(streamDescription);
}
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:21,代码来源:StreamingSparkApp.java

示例3: dataset

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
/**
 * Ensures the given dataset exists, creating it if it doesn't
 * and updating the schema if necessary.
 */
protected void dataset(String uri, DatasetDescriptor descriptor) {

  try {

    Datasets.create(uri, descriptor);
  } catch (DatasetExistsException e) {

    Dataset existingDataset = Datasets.load(uri);

    DatasetDescriptor updated;

    // The given discriptor might not have a location,
    // so use the current one.
    if (descriptor.getLocation() == null) {
      updated = new DatasetDescriptor.Builder(descriptor)
          .location(existingDataset.getDescriptor().getLocation())
          .build();
    } else {

      updated = descriptor;
    }

    Datasets.update(uri, updated);
  }
}
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:30,代码来源:AbstractApplication.java

示例4: setup

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
public void setup(AppContext context) {

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(KeyValues.getClassSchema())
        .build();

    dataset(OUTPUT_DATASET, descriptor);

    // Schedule our report to run every five minutes.
    Schedule schedule = new Schedule.Builder()
        .jobName("write-config-job")
        .jobClass(WriteConfigOutputJob.class)
        .frequency("0 * * * *")
        .withOutput("kv-output", OUTPUT_URI_PATTERN)
        .build();

    schedule(schedule);
  }
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:19,代码来源:WriteConfigOutputApp.java

示例5: setup

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
public void setup(AppContext context) {

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(ExampleEvent.getClassSchema())
      .build();

  dataset(EVENTS_DS_URI, descriptor);

  Topics.createTopic(context, TOPIC_NAME, 1, 1, ExampleEvent.getClassSchema());

  StreamDescription streamDescription = new StreamDescription.Builder()
      .jobName("test-event-stream")
      .jobClass(TopicToDatasetJob.class)
      .withStream("event_stream", Topics.topic(TOPIC_NAME))
      .withView("event_output", EVENTS_DS_URI)
      .build();

  stream(streamDescription);
}
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:21,代码来源:TopicToDatasetApp.java

示例6: run

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
public int run(List<String> args) throws Exception {

  String inputUri = uri;
  String outputUri = "dataset:hive?dataset=correlated_events";

  if (args.size() == 1) {
    outputUri = args.get(0);
  }

  Preconditions.checkState(Datasets.exists(inputUri),
      "input dataset doesn't exists");

  if (!Datasets.exists(outputUri)) {
    Datasets.create(outputUri, new DatasetDescriptor.Builder()
        .format("avro")
        .schema(CorrelatedEvents.class)
        .build());
  }
  CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
  task.run();

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:25,代码来源:CorrelateEvents.java

示例7: run

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
public int run(List<String> args) throws Exception {

  Preconditions.checkState(!Datasets.exists(uri),
      "events dataset already exists");

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(StandardEvent.class).build();

  View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class);
  DatasetWriter<StandardEvent> writer = events.newWriter();
  try {
    while (System.currentTimeMillis() - baseTimestamp < 36000) {
      writer.write(generateRandomEvent());
    }
  } finally {
    writer.close();
  }

  System.out.println("Generated " + counter + " events");

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:24,代码来源:CreateEvents.java

示例8: testParquetDataset

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Test
public void testParquetDataset() throws EventDeliveryException {
  Datasets.delete(FILE_DATASET_URI);
  Dataset<GenericRecord> created = Datasets.create(FILE_DATASET_URI,
      new DatasetDescriptor.Builder(DESCRIPTOR)
          .format("parquet")
          .build());

  DatasetSink sink = sink(in, config);

  // run the sink
  sink.start();
  sink.process();

  // the transaction should not commit during the call to process
  assertThrows("Transaction should still be open", IllegalStateException.class,
      new Callable() {
        @Override
        public Object call() throws EventDeliveryException {
          in.getTransaction().begin();
          return null;
        }
      });
  // The records won't commit until the call to stop()
  Assert.assertEquals("Should not have committed", 0, read(created).size());

  sink.stop();

  Assert.assertEquals(Sets.newHashSet(expected), read(created));
  Assert.assertEquals("Should have committed", 0, remaining(in));
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:32,代码来源:TestDatasetSink.java

示例9: testPartitionedData

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Test
public void testPartitionedData() throws EventDeliveryException {
  URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
  try {
    Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
        .partitionStrategy(new PartitionStrategy.Builder()
            .identity("id", 10) // partition by id
            .build())
        .build());

    config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
        partitionedUri.toString());
    DatasetSink sink = sink(in, config);

    // run the sink
    sink.start();
    sink.process();
    sink.stop();

    Assert.assertEquals(
        Sets.newHashSet(expected),
        read(Datasets.load(partitionedUri)));
    Assert.assertEquals("Should have committed", 0, remaining(in));
  } finally {
    if (Datasets.exists(partitionedUri)) {
      Datasets.delete(partitionedUri);
    }
  }
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:30,代码来源:TestDatasetSink.java

示例10: createDataset

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
private static Dataset createDataset(Schema schema,
    CompressionType compressionType, String uri) {
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(schema)
      .format(Formats.PARQUET)
      .compressionType(compressionType)
      .build();
  return Datasets.create(uri, descriptor, GenericRecord.class);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:10,代码来源:ParquetJob.java

示例11: configureInputFormat

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName,
    String splitByCol) throws ClassNotFoundException, IOException {
  fileType = getInputFileType();

  super.configureInputFormat(job, tableName, tableClassName, splitByCol);

  if (isHCatJob) {
    SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(),
        tableName, job.getConfiguration());
    return;
  } else if (fileType == FileType.AVRO_DATA_FILE) {
    LOG.debug("Configuring for Avro export");
    configureGenericRecordExportInputFormat(job, tableName);
  } else if (fileType == FileType.PARQUET_FILE) {
    LOG.debug("Configuring for Parquet export");
    configureGenericRecordExportInputFormat(job, tableName);
    FileSystem fs = FileSystem.get(job.getConfiguration());
    String uri = "dataset:" + fs.makeQualified(getInputPath());
    Exception caughtException = null;
    try {
      DatasetKeyInputFormat.configure(job).readFrom(uri);
    } catch (DatasetNotFoundException e) {
      LOG.warn(e.getMessage(), e);
      LOG.warn("Trying to get data schema from parquet file directly");
      caughtException = e;
    }
    if (caughtException != null && caughtException instanceof DatasetNotFoundException) {
      DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri);
      Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class);
      DatasetKeyInputFormat.configure(job).readFrom(dataset);
    }
  }

  FileInputFormat.addInputPath(job, getInputPath());
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:37,代码来源:HdfsOdpsImportJob.java

示例12: getDatasetDescriptorFromParquetFile

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:32,代码来源:HdfsOdpsImportJob.java

示例13: createDataset

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
/**
 * Creates a new dataset.
 */
public static Dataset<GenericRecord> createDataset(String uri, org.apache.sqoop.schema.Schema schema,
    FileFormat format) {
  Schema datasetSchema = KiteDataTypeUtil.createAvroSchema(schema);
  Format datasetFormat = KiteDataTypeUtil.toFormat(format);
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .property("kite.allow.csv", "true")
      .schema(datasetSchema)
      .format(datasetFormat)
      .build();
  return Datasets.create(uri, descriptor);
}
 
开发者ID:vybs,项目名称:sqoop-on-spark,代码行数:15,代码来源:KiteDatasetExecutor.java

示例14: setup

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
@Override
public void setup(AppContext context) {

  // Create the input and output datasets.
  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .provided("year", "int")
      .provided("month", "int")
      .provided("day", "int")
      .provided("hour", "int")
      .build();

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(DatasetTestUtilities.USER_SCHEMA)
      .partitionStrategy(strategy)
      .build();

  dataset(INPUT_DATASET, descriptor);
  dataset(OUTPUT_DATASET, descriptor);

  // Schedule our report to run every five minutes.
  Schedule schedule = new Schedule.Builder()
      .jobName("simple-spark-job")
      .jobClass(SimpleSparkJob.class)
      .frequency("0 * * * *")
      .withInput("source.users", INPUT_URI_PATTERN, "* * * * *")
      .withOutput("target.users", OUTPUT_URI_PATTERN)
      .build();

  schedule(schedule);
}
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:31,代码来源:SimpleSparkApp.java

示例15: setup

import org.kitesdk.data.DatasetDescriptor; //导入依赖的package包/类
public void setup(AppContext context) {

    // Create the input and output datasets.
    PartitionStrategy strategy = new PartitionStrategy.Builder()
        .provided("year", "int")
        .provided("month", "int")
        .provided("day", "int")
        .provided("hour", "int")
        .build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
        .schema(DatasetTestUtilities.USER_SCHEMA)
        .partitionStrategy(strategy)
        .build();

    dataset(INPUT_DATASET, descriptor);
    dataset(OUTPUT_DATASET, descriptor);

    // Schedule our report to run every five minutes.
    Schedule schedule = new Schedule.Builder()
        .jobName("test-job")
        .jobClass(ScheduledInputOutputJob.class)
        .frequency("0 * * * *")
        .withInput("source_users", INPUT_URI_PATTERN, "* * * * *")
        .withOutput("target_users", OUTPUT_URI_PATTERN)
        .build();

    schedule(schedule);
  }
 
开发者ID:rbrush,项目名称:kite-apps,代码行数:30,代码来源:ScheduledInputOutputApp.java


注:本文中的org.kitesdk.data.DatasetDescriptor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。