本文整理汇总了Java中org.kitesdk.data.Dataset.newReader方法的典型用法代码示例。如果您正苦于以下问题:Java Dataset.newReader方法的具体用法?Java Dataset.newReader怎么用?Java Dataset.newReader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.kitesdk.data.Dataset
的用法示例。
在下文中一共展示了Dataset.newReader方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hdfs:/tmp/data/users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : reader) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例2: run
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the users dataset
Dataset<Record> users = Datasets.load(
"dataset:hive?dataset=users", Record.class);
// Get a reader for the dataset and read all the users
DatasetReader<Record> reader = null;
try {
reader = users.newReader();
for (GenericRecord user : users.newReader()) {
System.out.println(user);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例3: run
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the products dataset
Dataset<Product> products = Datasets.load(
"dataset:hdfs:/tmp/data/products", Product.class);
// Get a reader for the dataset and read all the users
DatasetReader<Product> reader = null;
try {
reader = products.newReader();
for (Product product : reader) {
System.out.println(product);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例4: run
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Load the events dataset
Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events");
// Get a reader for the dataset and read all the events
DatasetReader<GenericRecord> reader = events.newReader();
try {
for (GenericRecord event : reader) {
System.out.println(event);
}
} finally {
reader.close();
}
return 0;
}
示例5: run
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Dataset<Record> movies = Datasets.load(
"dataset:hdfs:/tmp/data/movies", Record.class);
DatasetReader<Record> reader = null;
try {
reader = movies.newReader();
for (Record rec : reader) {
System.err.println("Movie: " + rec);
}
} finally {
if (reader != null) {
reader.close();
}
}
return 0;
}
示例6: verifyHiveDataset
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
private void verifyHiveDataset(String tableName, Object[][] valsArray) {
String datasetUri = String.format("dataset:hive:default/%s",
tableName.toLowerCase());
assertTrue(Datasets.exists(datasetUri));
Dataset dataset = Datasets.load(datasetUri);
assertFalse(dataset.isEmpty());
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
List<String> expectations = new ArrayList<String>();
if (valsArray != null) {
for (Object[] vals : valsArray) {
expectations.add(Arrays.toString(vals));
}
}
while (reader.hasNext() && expectations.size() > 0) {
String actual = Arrays.toString(
convertGenericRecordToArray(reader.next()));
assertTrue("Expect record: " + actual, expectations.remove(actual));
}
assertFalse(reader.hasNext());
assertEquals(0, expectations.size());
} finally {
reader.close();
}
}
示例7: testMultiTableImportAsParquetFormat
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
public void testMultiTableImportAsParquetFormat() throws IOException {
String [] argv = getArgv(new String[]{"--as-parquetfile"}, null);
runImport(new ImportAllTablesTool(), argv);
Path warehousePath = new Path(this.getWarehouseDir());
int i = 0;
for (String tableName : this.tableNames) {
Path tablePath = new Path(warehousePath, tableName);
Dataset dataset = Datasets.load("dataset:file:" + tablePath);
// dequeue the expected value for this table. This
// list has the same order as the tableNames list.
String expectedVal = Integer.toString(i++) + ","
+ this.expectedStrings.get(0);
this.expectedStrings.remove(0);
DatasetReader<GenericRecord> reader = dataset.newReader();
try {
GenericRecord record = reader.next();
String line = record.get(0) + "," + record.get(1);
assertEquals("Table " + tableName + " expected a different string",
expectedVal, line);
assertFalse(reader.hasNext());
} finally {
reader.close();
}
}
}
示例8: testJobConfiguration
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Test
public void testJobConfiguration() {
Map<String,String> settings = ImmutableMap.of("test.app.level.setting", "appvalue",
"kite.job.write-config-job.test.job.level.setting", "jobvalue",
"kite.job.write-config-job.output.kv-output.test.output.level.setting", "outputvalue");
AppContext context = new AppContext(settings, getConfiguration());
TestScheduler scheduler = TestScheduler.load(WriteConfigOutputApp.class, context);
Instant nominalTime = new DateTime(2015, 5, 15, 12, 0, 0, 0, DateTimeZone.UTC).toInstant();
scheduler.runScheduledJobs(nominalTime);
Dataset<KeyValues> ds = Datasets.load(WriteConfigOutputApp.OUTPUT_DATASET, KeyValues.class);
DatasetReader<KeyValues> reader = ds.newReader();
try {
KeyValues kv = reader.next();
Assert.assertEquals(ImmutableMap.of(
"test.app.level.setting", "appvalue",
"test.job.level.setting", "jobvalue",
"output.kv-output.test.output.level.setting", "outputvalue"),
kv.getJobsettings());
Assert.assertEquals(ImmutableMap.of(
"test.app.level.setting", "appvalue",
"test.job.level.setting", "jobvalue",
"test.output.level.setting", "outputvalue"),
kv.getOutputsettings());
} finally {
Closeables.closeQuietly(reader);
}
}
示例9: testGenerateAndRunReport
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Test
public void testGenerateAndRunReport() {
DateTime firstNominalTime = new DateTime(2015, 5, 7, 12, 0, 0);
// Run the generator job at each minute.
TestScheduler generatorRunner = TestScheduler.load(DataGeneratorApp.class, new AppContext(getConfiguration()));
for (int i = 0; i < 5; ++i) {
generatorRunner.runScheduledJobs(firstNominalTime.plusMinutes(i).toInstant());
}
// Now run the report job to aggregate over the schedule.
TestScheduler reportRunner = TestScheduler.load(ScheduledReportApp.class, new AppContext(getConfiguration()));
reportRunner.runScheduledJobs(firstNominalTime.plusMinutes(5).toInstant());
// Verify the expected data was written.
Dataset<GenericData.Record> ds = Datasets.load(ScheduledReportApp.REPORT_DS_URI, GenericData.Record.class);
DatasetReader<GenericData.Record> reader = ds.newReader();
try {
int count = 0;
for (GenericData.Record event: reader) {
// Each had an event created in each generated data run,
// totalling 5
Assert.assertEquals(5L, event.get("event_count"));
++count;
}
// We should see ten distinct IDs.
Assert.assertEquals(10, count);
} finally {
reader.close();
}
}
示例10: testGenerateData
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Test
public void testGenerateData() {
TestScheduler container = TestScheduler.load(DataGeneratorApp.class, new AppContext(getConfiguration()));
container.runScheduledJobs(new Instant());
// Verify the expected data was written.
Dataset<ExampleEvent> ds = Datasets.load(DataGeneratorApp.EVENT_DS_URI, ExampleEvent.class);
DatasetReader<ExampleEvent> reader = ds.newReader();
try {
int count = 0;
for (ExampleEvent event: reader) {
++count;
}
Assert.assertEquals(10, count);
} finally {
reader.close();
}
}
示例11: testStream
import org.kitesdk.data.Dataset; //导入方法依赖的package包/类
@Test
public void testStream() throws InterruptedException {
Dataset<ExampleEvent> output = Datasets.load(TopicToDatasetApp.EVENTS_DS_URI, ExampleEvent.class);
runner.writeMessages(TopicToDatasetApp.TOPIC_NAME, getEvents());
boolean hasRecords = false;
for (int i = 0; i < 10; ++i) {
Thread.sleep(2000);
// Verify the output contains the expected content.
DatasetReader<ExampleEvent> reader = output.newReader();
try {
int count = 0;
for (ExampleEvent event: reader) {
++count;
}
if (count == 10) {
hasRecords = true;
break;
}
} finally {
reader.close();
}
}
Assert.assertTrue("Expected output records not found", hasRecords);
}