本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.createStructType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.createStructType方法的具体用法?Java DataTypes.createStructType怎么用?Java DataTypes.createStructType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.types.DataTypes
的用法示例。
在下文中一共展示了DataTypes.createStructType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeEntityMetadata
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Write metadata describing entity tables
*
* @param entitySchema the entity schema
*/
public void writeEntityMetadata(EntitySchema entitySchema) {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ENTITIES_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_URI, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_LABEL, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(ENTITIES_NUM_ROWS, DataTypes.LongType, false));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_URI));
List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_NAME));
final Map<String, String> uriLabels = rdfSchema.getUriLabels();
// create table rows
List<Row> rows = entitySchema.getTables().stream()
.map(table -> {
Object[] valueArray = new Object[]{
table.getName(),
table.getTypeURI(),
uriLabels.get(table.getTypeURI()),
table.getNumRows()
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
// create and write the META_Entities dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(ENTITIES_TABLE_NAME, df);
persistor.createPrimaryKeys(primaryKeys);
persistor.createIndexes(indexes);
df.unpersist();
}
示例2: testAppendFields
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testAppendFields() {
StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
StructField field4 = DataTypes.createStructField("field4", DataTypes.BooleanType, true);
StructField field5 = DataTypes.createStructField("field5", DataTypes.StringType, true);
StructType appendSchema = RowUtils.appendFields(schema, Lists.newArrayList(field4, field5));
assertEquals(appendSchema.length(), 5);
assertEquals(appendSchema.fields()[0], field1);
assertEquals(appendSchema.fields()[1], field2);
assertEquals(appendSchema.fields()[2], field3);
assertEquals(appendSchema.fields()[3], field4);
assertEquals(appendSchema.fields()[4], field5);
}
示例3: getLastOffsets
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private Map<TopicPartition, Long> getLastOffsets() throws Exception {
// Create filter for groupid/topic
StructType filterSchema = DataTypes.createStructType(Lists.newArrayList(
DataTypes.createStructField("group_id", DataTypes.StringType, false),
DataTypes.createStructField("topic", DataTypes.StringType, false)));
Row groupIDTopicFilter = new RowWithSchema(filterSchema, groupID, topic);
Iterable<Row> filters = Collections.singleton(groupIDTopicFilter);
// Get results
RandomOutput output = getOffsetsOutput();
Iterable<Row> results = output.getExistingForFilters(filters);
// Transform results into map
Map<TopicPartition, Long> offsetRanges = Maps.newHashMap();
for (Row result : results) {
Integer partition = result.getInt(result.fieldIndex("partition"));
Long offset = result.getLong(result.fieldIndex("offset"));
TopicPartition topicPartition = new TopicPartition(topic, partition);
offsetRanges.put(topicPartition, offset);
}
return offsetRanges;
}
示例4: convertToRowValidNullValue
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowValidNullValue(
final @Mocked RowUtils utils
) throws Exception {
Record record = new Record();
record.put("field1", null);
StructType schema = DataTypes.createStructType(Lists.newArrayList(
DataTypes.createStructField("field1", DataTypes.StringType, true))
);
assertEquals("Invalid conversion", null, MorphlineUtils.convertToRow(schema, record).get(0));
new Verifications() {{
RowUtils.toRowValue(any, (DataType) any); times = 0;
}};
}
示例5: convertToRowInvalidNullValue
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowInvalidNullValue(
final @Mocked RowUtils utils
) throws Exception {
Record record = new Record();
record.put("field1", null);
StructType schema = DataTypes.createStructType(Lists.newArrayList(
DataTypes.createStructField("field1", DataTypes.StringType, false))
);
try {
MorphlineUtils.convertToRow(schema, record);
fail("Did not throw a RuntimeException");
} catch (Exception e) {
assertThat(e.getMessage(), JUnitMatchers.containsString("DataType cannot contain 'null'"));
}
new Verifications() {{
RowUtils.toRowValue(any, (DataType) any); times = 0;
}};
}
示例6: convertToRowInvalidTypeNullable
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowInvalidTypeNullable(
final @Mocked RowUtils utils
) throws Exception {
Record record = new Record();
record.put("field1", "one");
StructType schema = DataTypes.createStructType(Lists.newArrayList(
DataTypes.createStructField("field1", DataTypes.StringType, true))
);
new Expectations() {{
RowUtils.toRowValue("one", DataTypes.StringType); result = new RuntimeException("Conversion exception");
}};
try {
MorphlineUtils.convertToRow(schema, record);
fail("Did not throw a RuntimeException");
} catch (Exception e) {
assertThat(e.getMessage(), JUnitMatchers.containsString("Error converting Field"));
}
}
示例7: toSchemaArraysNested
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void toSchemaArraysNested() throws Exception {
StructType input = DataTypes.createStructType(Lists.newArrayList(
// Outer
DataTypes.createStructField("Outer", DataTypes.createArrayType(
// Inner
DataTypes.createArrayType(DataTypes.IntegerType, false),
false), false)
));
Schema schema = AvroUtils.schemaFor(input);
assertEquals("Invalid field count", 1, schema.getFields().size());
assertEquals("Invalid field name", "Outer", schema.getFields().get(0).name());
assertEquals("Invalid field type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getType());
assertEquals("Invalid outer element type, i.e the inner type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getElementType().getType());
assertEquals("Invalid inner element type", Schema.Type.INT, schema.getFields().get(0).schema().getElementType().getElementType().getType());
//System.out.println(schema.toString(true));
}
示例8: toSparkSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert MTable schema to Spark DataFrame schema. Map the respective type
* names from MTable (DataType) to corresponding DataTypes.
*
* @param td the table descriptor
* @return the Spark DataFrame schema
*/
public static org.apache.spark.sql.types.StructType toSparkSchema(final TableDescriptor td) {
List<StructField> fields = td.getAllColumnDescriptors()
.stream().sequential().map(cd -> DataTypes.createStructField(cd.getColumnNameAsString(),
toSparkType(cd.getColumnType()), true))
.collect(Collectors.toList());
return DataTypes.createStructType(fields);
}
示例9: writePredicateMetadata
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Persist predicate metadata table storing all predicates.
*/
public void writePredicateMetadata() {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(PREDICATE_ID, DataTypes.IntegerType, false));
fields.add(DataTypes.createStructField(PREDICATE_URI, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(PREDICATE_LABEL, DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_URI));
List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
primaryKeys.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_ID));
final IndexMap<String> predicateIndex = rdfSchema.getPredicateIndex();
final Map<String, String> uriLabels = rdfSchema.getUriLabels();
// create table rows
List<Row> rows = predicateIndex.getValues().stream()
.map(uri -> {
Object[] valueArray = new Object[]{
predicateIndex.getIndex(uri),
uri,
uriLabels.get(uri)
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
// create and write the META_Predicates dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(PREDICATES_TABLE_NAME, df);
persistor.createPrimaryKeys(primaryKeys);
persistor.createIndexes(indexes);
df.unpersist();
}
示例10: writeRelationMetadata
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Write metadata describing relation tables
*
* @param relationSchema the relation schema
*/
public void writeRelationMetadata(RelationSchema relationSchema) {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(RELATIONS_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(RELATIONS_FROM_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(RELATIONS_TO_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(RELATIONS_PREDICATE_ID, DataTypes.IntegerType, true));
// create table rows
List<Row> rows = relationSchema.getTables().stream()
.map(table -> {
RelationPredicateFilter predicateFilter = table.getPredicateFilter();
RelationEntityFilter entityFilter = table.getEntityFilter();
Object[] valueArray = new Object[]{
table.getName(),
entityFilter == null ? null : entityFilter.getFromTypeName(),
entityFilter == null ? null : entityFilter.getToTypeName(),
predicateFilter == null ? null : rdfSchema.getPredicateIndex().getIndex(predicateFilter.getPredicateURI())
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
StructType schema = DataTypes.createStructType(fields);
// add index for each field
List<Tuple2<String, String>> indexes = fields.stream()
.map(field -> new Tuple2<>(RELATIONS_TABLE_NAME, field.name()))
.collect(Collectors.toList());
// create and write the META_Relations dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(RELATIONS_TABLE_NAME, df);
persistor.createIndexes(indexes);
df.unpersist();
}
示例11: getTestSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getTestSchema() {
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField("a", DataTypes.IntegerType, false));
fields.add(DataTypes.createStructField("b", DataTypes.DoubleType, false));
fields.add(DataTypes.createStructField("c", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("d", DataTypes.BooleanType, false));
fields.add(DataTypes.createStructField("e", DataTypes.FloatType, false));
return DataTypes.createStructType(fields);
}
示例12: getExpectedSchemaOfA
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfA() {
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(URI_COLUMN_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
return DataTypes.createStructType(fields);
}
示例13: getExpectedSchemaOfB
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfB() {
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(URI_COLUMN_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, true));
return DataTypes.createStructType(fields);
}
示例14: getExpectedSchemaOfEAV
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfEAV() {
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(PREDICATE_COLUMN_NAME, DataTypes.IntegerType, false));
fields.add(DataTypes.createStructField(EAV_DATATYPE_COLUMN_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(EAV_LANGUAGE_COLUMN_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(EAV_VALUE_COLUMN_NAME, DataTypes.StringType, false));
return DataTypes.createStructType(fields);
}
示例15: getExpectedSchemaOfSingleRelationTable
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfSingleRelationTable() {
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ID_COLUMN_NAME + ID_FROM_SUFFIX, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(ID_COLUMN_NAME + ID_TO_SUFFIX, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(PREDICATE_COLUMN_NAME, DataTypes.IntegerType, false));
return DataTypes.createStructType(fields);
}