当前位置: 首页>>代码示例>>Java>>正文


Java Dataset.newWriter方法代码示例

本文整理汇总了Java中org.kitesdk.data.Dataset.newWriter方法的典型用法代码示例。如果您正苦于以下问题:Java Dataset.newWriter方法的具体用法?Java Dataset.newWriter怎么用?Java Dataset.newWriter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.kitesdk.data.Dataset的用法示例。


在下文中一共展示了Dataset.newWriter方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  // Create a partition strategy that hash partitions on username with 10 buckets
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
      .identity("favoriteColor", "favorite_color")
      .build();

  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .partitionStrategy(partitionStrategy)
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:36,代码来源:CreateUserDatasetGenericPartitioned.java

示例2: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:30,代码来源:CreateUserDatasetGeneric.java

示例3: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .format(Formats.PARQUET)
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:30,代码来源:CreateUserDatasetGenericParquet.java

示例4: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  // Create a dataset of products with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(Product.class)
      .build();
  Dataset<Product> products = Datasets.create(
      "dataset:hdfs:/tmp/data/products", descriptor, Product.class);

  // Get a writer for the dataset and write some products to it
  DatasetWriter<Product> writer = null;
  try {
    writer = products.newWriter();
    int i = 0;
    for (String name : names) {
      Product product = new Product();
      product.setName(name);
      product.setId(i++);
      writer.write(product);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:30,代码来源:CreateProductDatasetPojo.java

示例5: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .build();
  Dataset<Record> users = Datasets.create("dataset:hive?dataset=users",
      descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }

  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:31,代码来源:CreateHiveUserDatasetGeneric.java

示例6: run

import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  // going to generate a lot of random log messages
  final Random rand = new Random();

  // data is written to the staging dataset
  Dataset<Record> staging = Datasets.load(
      "dataset:file:/tmp/data/logs_staging", Record.class);

  // this is going to build our simple log records
  GenericRecordBuilder builder = new GenericRecordBuilder(
      staging.getDescriptor().getSchema());

  // generate timestamps 1 second apart starting 1 day ago
  final Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
  final long yesterday = now.getTimeInMillis() - DAY_IN_MILLIS;

  DatasetWriter<Record> writer = null;
  try {
    writer = staging.newWriter();

    // generate 15,000 messages, each 5 seconds apart, starting 24 hours ago
    // this is a little less than 24 hours worth of messages
    for (int second : Ranges.closed(0, 15000).asSet(DiscreteDomains.integers())) {
      LOG.info("Generating log message " + second);

      builder.set("timestamp", yesterday + second * 5000);
      builder.set("component", "GenerateSimpleLogs");

      int level = rand.nextInt(LOG_LEVELS.length);
      builder.set("level", LOG_LEVELS[level]);
      builder.set("message", LOG_MESSAGES[level]);

      writer.write(builder.build());
    }

    if (writer instanceof Flushable) {
      ((Flushable) writer).flush();
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
开发者ID:kite-sdk,项目名称:kite-examples,代码行数:48,代码来源:GenerateSimpleLogs.java


注:本文中的org.kitesdk.data.Dataset.newWriter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。