当前位置: 首页>>代码示例>>Java>>正文


Java DataTypes.createStructType方法代码示例

本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.createStructType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.createStructType方法的具体用法?Java DataTypes.createStructType怎么用?Java DataTypes.createStructType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.sql.types.DataTypes的用法示例。


在下文中一共展示了DataTypes.createStructType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeEntityMetadata

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Write metadata describing entity tables
 *
 * @param entitySchema the entity schema
 */
public void writeEntityMetadata(EntitySchema entitySchema) {

    // create the schema
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(ENTITIES_NAME, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(ENTITIES_URI, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(ENTITIES_LABEL, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(ENTITIES_NUM_ROWS, DataTypes.LongType, false));
    StructType schema = DataTypes.createStructType(fields);

    List<Tuple2<String, String>> indexes = new ArrayList<>();
    indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_URI));

    List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
    indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_NAME));

    final Map<String, String> uriLabels = rdfSchema.getUriLabels();
    // create table rows
    List<Row> rows = entitySchema.getTables().stream()
            .map(table -> {
                Object[] valueArray = new Object[]{
                        table.getName(),
                        table.getTypeURI(),
                        uriLabels.get(table.getTypeURI()),
                        table.getNumRows()
                };
                return RowFactory.create(valueArray);
            }).collect(Collectors.toList());

    // create and write the META_Entities dataframe
    DataFrame df = sql.createDataFrame(rows, schema);
    persistor.writeDataFrame(ENTITIES_TABLE_NAME, df);
    persistor.createPrimaryKeys(primaryKeys);
    persistor.createIndexes(indexes);
    df.unpersist();
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:42,代码来源:MetadataWriter.java

示例2: testAppendFields

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testAppendFields() {
  StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true);
  StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true);
  StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true);
  StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3));
  StructField field4 = DataTypes.createStructField("field4", DataTypes.BooleanType, true);
  StructField field5 = DataTypes.createStructField("field5", DataTypes.StringType, true);

  StructType appendSchema = RowUtils.appendFields(schema, Lists.newArrayList(field4, field5));

  assertEquals(appendSchema.length(), 5);
  assertEquals(appendSchema.fields()[0], field1);
  assertEquals(appendSchema.fields()[1], field2);
  assertEquals(appendSchema.fields()[2], field3);
  assertEquals(appendSchema.fields()[3], field4);
  assertEquals(appendSchema.fields()[4], field5);
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:19,代码来源:TestRowUtils.java

示例3: getLastOffsets

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private Map<TopicPartition, Long> getLastOffsets() throws Exception {
  // Create filter for groupid/topic
  StructType filterSchema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("group_id", DataTypes.StringType, false),
      DataTypes.createStructField("topic", DataTypes.StringType, false)));
  Row groupIDTopicFilter = new RowWithSchema(filterSchema, groupID, topic);
  Iterable<Row> filters = Collections.singleton(groupIDTopicFilter);
  
  // Get results
  RandomOutput output = getOffsetsOutput();
  Iterable<Row> results = output.getExistingForFilters(filters);
  
  // Transform results into map
  Map<TopicPartition, Long> offsetRanges = Maps.newHashMap();
  for (Row result : results) {
    Integer partition = result.getInt(result.fieldIndex("partition"));
    Long offset = result.getLong(result.fieldIndex("offset"));
    TopicPartition topicPartition = new TopicPartition(topic, partition);
    
    offsetRanges.put(topicPartition, offset);
  }
  
  return offsetRanges;
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:25,代码来源:KafkaInput.java

示例4: convertToRowValidNullValue

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowValidNullValue(
    final @Mocked RowUtils utils
) throws Exception {

  Record record = new Record();
  record.put("field1", null);

  StructType schema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.StringType, true))
  );

  assertEquals("Invalid conversion", null, MorphlineUtils.convertToRow(schema, record).get(0));

  new Verifications() {{
    RowUtils.toRowValue(any, (DataType) any); times = 0;
  }};
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:19,代码来源:TestMorphlineUtils.java

示例5: convertToRowInvalidNullValue

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowInvalidNullValue(
    final @Mocked RowUtils utils
) throws Exception {

  Record record = new Record();
  record.put("field1", null);

  StructType schema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.StringType, false))
  );

  try {
    MorphlineUtils.convertToRow(schema, record);
    fail("Did not throw a RuntimeException");
  } catch (Exception e) {
    assertThat(e.getMessage(), JUnitMatchers.containsString("DataType cannot contain 'null'"));
  }

  new Verifications() {{
    RowUtils.toRowValue(any, (DataType) any); times = 0;
  }};
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:24,代码来源:TestMorphlineUtils.java

示例6: convertToRowInvalidTypeNullable

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void convertToRowInvalidTypeNullable(
    final @Mocked RowUtils utils
) throws Exception {

  Record record = new Record();
  record.put("field1", "one");

  StructType schema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.StringType, true))
  );

  new Expectations() {{
    RowUtils.toRowValue("one", DataTypes.StringType); result = new RuntimeException("Conversion exception");
  }};

  try {
    MorphlineUtils.convertToRow(schema, record);
    fail("Did not throw a RuntimeException");
  } catch (Exception e) {
    assertThat(e.getMessage(), JUnitMatchers.containsString("Error converting Field"));
  }
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:24,代码来源:TestMorphlineUtils.java

示例7: toSchemaArraysNested

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void toSchemaArraysNested() throws Exception {

  StructType input = DataTypes.createStructType(Lists.newArrayList(
      // Outer
      DataTypes.createStructField("Outer", DataTypes.createArrayType(
          // Inner
          DataTypes.createArrayType(DataTypes.IntegerType, false),
          false), false)
  ));

  Schema schema = AvroUtils.schemaFor(input);

  assertEquals("Invalid field count", 1, schema.getFields().size());
  assertEquals("Invalid field name", "Outer", schema.getFields().get(0).name());
  assertEquals("Invalid field type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getType());
  assertEquals("Invalid outer element type, i.e the inner type", Schema.Type.ARRAY, schema.getFields().get(0).schema().getElementType().getType());
  assertEquals("Invalid inner element type", Schema.Type.INT, schema.getFields().get(0).schema().getElementType().getElementType().getType());

  //System.out.println(schema.toString(true));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:22,代码来源:TestAvroUtils.java

示例8: toSparkSchema

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Convert MTable schema to Spark DataFrame schema. Map the respective type
 * names from MTable (DataType) to corresponding DataTypes.
 *
 * @param td the table descriptor
 * @return the Spark DataFrame schema
 */
public static org.apache.spark.sql.types.StructType toSparkSchema(final TableDescriptor td) {
  List<StructField> fields = td.getAllColumnDescriptors()
    .stream().sequential().map(cd -> DataTypes.createStructField(cd.getColumnNameAsString(),
      toSparkType(cd.getColumnType()), true))
    .collect(Collectors.toList());
  return DataTypes.createStructType(fields);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:15,代码来源:Utils.java

示例9: writePredicateMetadata

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Persist predicate metadata table storing all predicates.
 */
public void writePredicateMetadata() {

    // create the schema
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(PREDICATE_ID, DataTypes.IntegerType, false));
    fields.add(DataTypes.createStructField(PREDICATE_URI, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(PREDICATE_LABEL, DataTypes.StringType, true));
    StructType schema = DataTypes.createStructType(fields);

    List<Tuple2<String, String>> indexes = new ArrayList<>();
    indexes.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_URI));

    List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
    primaryKeys.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_ID));


    final IndexMap<String> predicateIndex = rdfSchema.getPredicateIndex();
    final Map<String, String> uriLabels = rdfSchema.getUriLabels();
    // create table rows
    List<Row> rows = predicateIndex.getValues().stream()
            .map(uri -> {
                Object[] valueArray = new Object[]{
                        predicateIndex.getIndex(uri),
                        uri,
                        uriLabels.get(uri)
                };
                return RowFactory.create(valueArray);
            }).collect(Collectors.toList());

    // create and write the META_Predicates dataframe
    DataFrame df = sql.createDataFrame(rows, schema);
    persistor.writeDataFrame(PREDICATES_TABLE_NAME, df);
    persistor.createPrimaryKeys(primaryKeys);
    persistor.createIndexes(indexes);
    df.unpersist();
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:40,代码来源:MetadataWriter.java

示例10: writeRelationMetadata

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Write metadata describing relation tables
 *
 * @param relationSchema the relation schema
 */
public void writeRelationMetadata(RelationSchema relationSchema) {
    // create the schema
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(RELATIONS_NAME, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(RELATIONS_FROM_NAME, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(RELATIONS_TO_NAME, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(RELATIONS_PREDICATE_ID, DataTypes.IntegerType, true));

    // create table rows
    List<Row> rows = relationSchema.getTables().stream()
            .map(table -> {
                RelationPredicateFilter predicateFilter = table.getPredicateFilter();
                RelationEntityFilter entityFilter = table.getEntityFilter();
                Object[] valueArray = new Object[]{
                        table.getName(),
                        entityFilter == null ? null : entityFilter.getFromTypeName(),
                        entityFilter == null ? null : entityFilter.getToTypeName(),
                        predicateFilter == null ? null : rdfSchema.getPredicateIndex().getIndex(predicateFilter.getPredicateURI())
                };
                return RowFactory.create(valueArray);
            }).collect(Collectors.toList());

    StructType schema = DataTypes.createStructType(fields);

    // add index for each field
    List<Tuple2<String, String>> indexes = fields.stream()
            .map(field -> new Tuple2<>(RELATIONS_TABLE_NAME, field.name()))
            .collect(Collectors.toList());

    // create and write the META_Relations dataframe
    DataFrame df = sql.createDataFrame(rows, schema);
    persistor.writeDataFrame(RELATIONS_TABLE_NAME, df);
    persistor.createIndexes(indexes);
    df.unpersist();
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:41,代码来源:MetadataWriter.java

示例11: getTestSchema

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getTestSchema() {
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField("a", DataTypes.IntegerType, false));
    fields.add(DataTypes.createStructField("b", DataTypes.DoubleType, false));
    fields.add(DataTypes.createStructField("c", DataTypes.StringType, true));
    fields.add(DataTypes.createStructField("d", DataTypes.BooleanType, false));
    fields.add(DataTypes.createStructField("e", DataTypes.FloatType, false));
    return DataTypes.createStructType(fields);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:10,代码来源:PersistorTest.java

示例12: getExpectedSchemaOfA

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfA() {
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
    fields.add(DataTypes.createStructField(URI_COLUMN_NAME, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
    return DataTypes.createStructType(fields);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:8,代码来源:InstanceRelationWriterTest.java

示例13: getExpectedSchemaOfB

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfB() {
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
    fields.add(DataTypes.createStructField(URI_COLUMN_NAME, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
    fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, true));
    return DataTypes.createStructType(fields);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:9,代码来源:InstanceRelationWriterTest.java

示例14: getExpectedSchemaOfEAV

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfEAV() {
    List<StructField> fields = new ArrayList<>();

    fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
    fields.add(DataTypes.createStructField(PREDICATE_COLUMN_NAME, DataTypes.IntegerType, false));
    fields.add(DataTypes.createStructField(EAV_DATATYPE_COLUMN_NAME, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(EAV_LANGUAGE_COLUMN_NAME, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(EAV_VALUE_COLUMN_NAME, DataTypes.StringType, false));

    return DataTypes.createStructType(fields);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:12,代码来源:InstanceRelationWriterTest.java

示例15: getExpectedSchemaOfSingleRelationTable

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private StructType getExpectedSchemaOfSingleRelationTable() {
    List<StructField> fields = new ArrayList<>();

    fields.add(DataTypes.createStructField(ID_COLUMN_NAME + ID_FROM_SUFFIX, DataTypes.LongType, false));
    fields.add(DataTypes.createStructField(ID_COLUMN_NAME + ID_TO_SUFFIX, DataTypes.LongType, false));
    fields.add(DataTypes.createStructField(PREDICATE_COLUMN_NAME, DataTypes.IntegerType, false));

    return DataTypes.createStructType(fields);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:10,代码来源:InstanceRelationWriterTest.java


注:本文中的org.apache.spark.sql.types.DataTypes.createStructType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。