本文整理汇总了Java中org.embulk.spi.Schema类的典型用法代码示例。如果您正苦于以下问题:Java Schema类的具体用法?Java Schema怎么用?Java Schema使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Schema类属于org.embulk.spi包,在下文中一共展示了Schema类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public TaskReport run(TaskSource taskSource,
Schema schema, int taskIndex,
PageOutput output)
{
PluginTask task = taskSource.loadTask(PluginTask.class);
Integer rows = task.getRows();
final HashMap<Column, Map<String, Integer>> columnOptions = getColumnOptions(task);
try (PageBuilder pagebuilder =
new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
IntStream.rangeClosed(
taskIndex * rows + 1,
taskIndex * rows + rows
).boxed().forEach(rowNumber -> {
RandomjColumnVisitor visitor = new RandomjColumnVisitor(pagebuilder, task, rowNumber, columnOptions);
schema.visitColumns(visitor);
pagebuilder.addRecord();
});
pagebuilder.finish();
}
TaskReport taskReport = Exec.newTaskReport();
taskReport.set("columns", schema.size());
taskReport.set("rows", rows);
return taskReport;
}
示例2: getColumnOptions
import org.embulk.spi.Schema; //导入依赖的package包/类
HashMap<Column, Map<String, Integer>> getColumnOptions(PluginTask task)
{
SchemaConfig schemaConfig = task.getSchema();
Schema schema = schemaConfig.toSchema();
HashMap<Column, Map<String, Integer>> lengthMap = new HashMap<>();
for (Column column : schema.getColumns()) {
HashMap<String, Integer> miniMap = new HashMap<>();
ColumnConfig c = schemaConfig.getColumn(column.getIndex());
miniMap.put("length", c.getOption().get(Integer.class, "length", 0));
miniMap.put("null_rate", c.getOption().get(Integer.class, "null_rate", 0));
miniMap.put("max_value", c.getOption().get(Integer.class, "max_value", null));
miniMap.put("min_value", c.getOption().get(Integer.class, "min_value", null));
lengthMap.put(column, miniMap);
}
return lengthMap;
}
示例3: transaction
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public void transaction(ConfigSource config, Schema schema,
FormatterPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
// validate avsc option
try {
File avsc = task.getAvsc().getFile();
new org.apache.avro.Schema.Parser().parse(avsc);
} catch (IOException e) {
throw new ConfigException("avsc file is not found");
}
// validate column_options
for (String columnName : task.getColumnOptions().keySet()) {
schema.lookupColumn(columnName); // throws SchemaConfigException
}
control.run(task.dump());
}
示例4: transaction
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public ConfigDiff transaction(ConfigSource config,
InputPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
configureParquetLogger(task);
Path rootPath = new Path(task.getPath());
try (PluginClassLoaderScope ignored = new PluginClassLoaderScope()) {
Configuration conf = ConfigurationFactory.create(task);
FileSystem fs = FileSystem.get(rootPath.toUri(), conf);
List<FileStatus> statusList = listFileStatuses(fs, rootPath);
if (statusList.isEmpty()) {
throw new PathNotFoundException(rootPath.toString());
}
for (FileStatus status : statusList) {
logger.debug("embulk-input-parquet_hadoop: Loading paths: {}, length: {}",
status.getPath(), status.getLen());
}
List<String> files = Lists.transform(statusList, new Function<FileStatus, String>() {
@Nullable
@Override
public String apply(@Nullable FileStatus input)
{
return input.getPath().toString();
}
});
task.setFiles(files);
}
catch (IOException e) {
throw Throwables.propagate(e);
}
Schema schema = newSchema();
int taskCount = task.getFiles().size();
return resume(task.dump(), schema, taskCount, control);
}
示例5: serialize
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public byte[] serialize(Mode mode, PageReader reader, Schema schema, List<String> dataColumns, TimestampFormatter[] timestampFormatters) {
S3PerRecordOutputColumnVisitor visitor;
switch(mode) {
case SINGLE_COLUMN:
visitor = new MessagePackSingleColumnVisitor(reader, timestampFormatters);
schema.lookupColumn(dataColumns.get(0)).visit(visitor);
break;
case MULTI_COLUMN:
visitor = new MessagePackMultiColumnVisitor(reader, timestampFormatters);
for (String columnName : dataColumns) {
schema.lookupColumn(columnName).visit(visitor);
}
break;
default:
throw new RuntimeException("never reach here");
}
return visitor.getByteArray();
}
示例6: transaction
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public void transaction(ConfigSource config, ParserPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
File avsc = task.getAvsc().getFile();
org.apache.avro.Schema avroSchema;
try {
avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
} catch (IOException e) {
throw new ConfigException("avsc file is not found");
}
Schema schema = buildSchema(task.getColumns(), avroSchema);
control.run(task.dump(), schema);
}
示例7: configure
import org.embulk.spi.Schema; //导入依赖的package包/类
private void configure(PluginTask task, Schema inputSchema)
{
List<ColumnConfig> columns = task.getColumns();
if (columns.size() < 2) {
throw new ConfigException("\"columns\" should be specified 2~ columns.");
}
// throw if column type is not supported
for (ColumnConfig columnConfig : columns) {
String name = columnConfig.getName();
Type type = inputSchema.lookupColumn(name).getType();
if (type instanceof JsonType) {
throw new ConfigException(String.format("casting to json is not available: \"%s\"", name));
}
if (type instanceof TimestampType) {
throw new ConfigException(String.format("casting to timestamp is not available: \"%s\"", name));
}
}
}
示例8: transaction
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
String columnName = task.getQueryStringColumnName();
Schema.Builder builder = Schema.builder();
for (Column inputColumn : inputSchema.getColumns()) {
if (columnName.equals(inputColumn.getName())) {
insertColumns(builder, task.getExpandedColumns());
}
else {
builder.add(inputColumn.getName(), inputColumn.getType());
}
}
control.run(task.dump(), builder.build());
}
示例9: testOpenSuccessfully
import org.embulk.spi.Schema; //导入依赖的package包/类
@Test
public void testOpenSuccessfully()
{
ConfigSource configSource = loadConfigSource("testOpen.yml");
final Schema inputSchema = Schema.builder()
.add("qb", STRING)
.add("qs", STRING)
.add("qa", STRING)
.build();
testQueryString(configSource, inputSchema, "/path?q1=1&q2=2#fragment", new AssertionWithPage()
{
@Override
public void run(PageReader pageReader, TestPageBuilderReader.MockPageOutput pageOutput)
{
for (Page page : pageOutput.pages) {
pageReader.setPage(page);
assertThat(pageReader.getString(0), is("before"));
assertThat(pageReader.getString(1), is("1"));
assertEquals(2L, pageReader.getLong(2));
assertThat(pageReader.getString(3), is("after"));
}
}
});
}
示例10: testOpenSuccessfullyWithHashbang
import org.embulk.spi.Schema; //导入依赖的package包/类
@Test
public void testOpenSuccessfullyWithHashbang()
{
ConfigSource configSource = loadConfigSource("testOpen.yml");
final Schema inputSchema = Schema.builder()
.add("qb", STRING)
.add("qs", STRING)
.add("qa", STRING)
.build();
testQueryString(configSource, inputSchema, "/#!/path?q1=one&q2=2#fragment", new AssertionWithPage()
{
@Override
public void run(PageReader pageReader, TestPageBuilderReader.MockPageOutput pageOutput)
{
for (Page page : pageOutput.pages) {
pageReader.setPage(page);
assertThat(pageReader.getString(0), is("before"));
assertThat(pageReader.getString(1), is("one"));
assertEquals(2L, pageReader.getLong(2));
assertThat(pageReader.getString(3), is("after"));
}
}
});
}
示例11: testQueryString
import org.embulk.spi.Schema; //导入依赖的package包/类
private void testQueryString(ConfigSource configSource, final Schema inputSchema, final String path, final AssertionWithPage assertion)
{
final QueryStringFilterPlugin plugin = new QueryStringFilterPlugin();
plugin.transaction(configSource, inputSchema, new FilterPlugin.Control()
{
@Override
public void run(TaskSource taskSource, Schema outputSchema)
{
TestPageBuilderReader.MockPageOutput mockPageOutput = new TestPageBuilderReader.MockPageOutput();
PageOutput pageOutput = plugin.open(taskSource, inputSchema, outputSchema, mockPageOutput);
List<Page> pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema, "before", path, "after");
for (Page page : pages) {
pageOutput.add(page);
}
pageOutput.finish();
pageOutput.close();
PageReader pageReader = new PageReader(outputSchema);
assertion.run(pageReader, mockPageOutput);
;
}
});
}
示例12: transaction
import org.embulk.spi.Schema; //导入依赖的package包/类
@Override
public void transaction(ConfigSource config, Schema inputSchema,
FilterPlugin.Control control)
{
PluginTask task = config.loadConfig(PluginTask.class);
String json_template = task.getJsonColumn().getTemplate();
BuildJsonConfigChecker checker = new BuildJsonConfigChecker(json_template,inputSchema);
checker.validateJSON();
Schema outputSchema = buildOutputSchema(task, inputSchema);
for (Column column : outputSchema.getColumns()) {
logger.debug("OutputSchema: {}", column);
}
control.run(task.dump(), outputSchema);
}
示例13: buildOutputSchema
import org.embulk.spi.Schema; //导入依赖的package包/类
static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
{
Type jsonColumnType = jsonColumnType(task);
String jsonColumnName = task.getJsonColumn().getName();
Schema.Builder builder = Schema.builder();
int found = 0;
for (Column inputColumns : inputSchema.getColumns()) {
if (jsonColumnName.equals(inputColumns.getName())) {
builder.add(inputColumns.getName(), jsonColumnType);
found = 1;
}
else {
builder.add(inputColumns.getName(), inputColumns.getType());
}
}
if (found == 0) {
builder.add(jsonColumnName, jsonColumnType);
}
return builder.build();
}
示例14: filter
import org.embulk.spi.Schema; //导入依赖的package包/类
private List<Object[]> filter(PluginTask task, Schema inputSchema, Object ... objects)
{
TestPageBuilderReader.MockPageOutput output = new TestPageBuilderReader.MockPageOutput();
Schema outputSchema = BuildJsonFilterPlugin.buildOutputSchema(task, inputSchema);
PageBuilder pageBuilder = new PageBuilder(runtime.getBufferAllocator(), outputSchema, output);
PageReader pageReader = new PageReader(inputSchema);
BuildJsonVisitorImpl visitor = new BuildJsonVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
List<Page> pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema, objects);
for (Page page : pages) {
pageReader.setPage(page);
while (pageReader.nextRecord()) {
outputSchema.visitColumns(visitor);
pageBuilder.addRecord();
}
}
pageBuilder.finish();
pageBuilder.close();
return Pages.toObjects(outputSchema, output.pages);
}
示例15: buildOuputSchema
import org.embulk.spi.Schema; //导入依赖的package包/类
private Schema buildOuputSchema(final PluginTask task, final Schema inputSchema)
{
List<ColumnConfig> columnConfigs = task.getColumns();
ImmutableList.Builder<Column> builder = ImmutableList.builder();
int i = 0;
for (Column inputColumn : inputSchema.getColumns()) {
String name = inputColumn.getName();
Type type = inputColumn.getType();
ColumnConfig columnConfig = getColumnConfig(name, columnConfigs);
if (columnConfig != null) {
type = columnConfig.getType();
}
Column outputColumn = new Column(i++, name, type);
builder.add(outputColumn);
}
return new Schema(builder.build());
}