本文整理汇总了Java中org.apache.spark.sql.DataFrame.unpersist方法的典型用法代码示例。如果您正苦于以下问题:Java DataFrame.unpersist方法的具体用法?Java DataFrame.unpersist怎么用?Java DataFrame.unpersist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.DataFrame
的用法示例。
在下文中一共展示了DataFrame.unpersist方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeEntityMetadata
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Write metadata describing entity tables
*
* @param entitySchema the entity schema
*/
public void writeEntityMetadata(EntitySchema entitySchema) {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ENTITIES_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_URI, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_LABEL, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(ENTITIES_NUM_ROWS, DataTypes.LongType, false));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_URI));
List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_NAME));
final Map<String, String> uriLabels = rdfSchema.getUriLabels();
// create table rows
List<Row> rows = entitySchema.getTables().stream()
.map(table -> {
Object[] valueArray = new Object[]{
table.getName(),
table.getTypeURI(),
uriLabels.get(table.getTypeURI()),
table.getNumRows()
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
// create and write the META_Entities dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(ENTITIES_TABLE_NAME, df);
persistor.createPrimaryKeys(primaryKeys);
persistor.createIndexes(indexes);
df.unpersist();
}
示例2: writePredicateMetadata
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Persist predicate metadata table storing all predicates.
*/
public void writePredicateMetadata() {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(PREDICATE_ID, DataTypes.IntegerType, false));
fields.add(DataTypes.createStructField(PREDICATE_URI, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(PREDICATE_LABEL, DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_URI));
List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
primaryKeys.add(new Tuple2<>(PREDICATES_TABLE_NAME, PREDICATE_ID));
final IndexMap<String> predicateIndex = rdfSchema.getPredicateIndex();
final Map<String, String> uriLabels = rdfSchema.getUriLabels();
// create table rows
List<Row> rows = predicateIndex.getValues().stream()
.map(uri -> {
Object[] valueArray = new Object[]{
predicateIndex.getIndex(uri),
uri,
uriLabels.get(uri)
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
// create and write the META_Predicates dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(PREDICATES_TABLE_NAME, df);
persistor.createPrimaryKeys(primaryKeys);
persistor.createIndexes(indexes);
df.unpersist();
}
示例3: writeRelationMetadata
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Write metadata describing relation tables
*
* @param relationSchema the relation schema
*/
public void writeRelationMetadata(RelationSchema relationSchema) {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(RELATIONS_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(RELATIONS_FROM_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(RELATIONS_TO_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(RELATIONS_PREDICATE_ID, DataTypes.IntegerType, true));
// create table rows
List<Row> rows = relationSchema.getTables().stream()
.map(table -> {
RelationPredicateFilter predicateFilter = table.getPredicateFilter();
RelationEntityFilter entityFilter = table.getEntityFilter();
Object[] valueArray = new Object[]{
table.getName(),
entityFilter == null ? null : entityFilter.getFromTypeName(),
entityFilter == null ? null : entityFilter.getToTypeName(),
predicateFilter == null ? null : rdfSchema.getPredicateIndex().getIndex(predicateFilter.getPredicateURI())
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
StructType schema = DataTypes.createStructType(fields);
// add index for each field
List<Tuple2<String, String>> indexes = fields.stream()
.map(field -> new Tuple2<>(RELATIONS_TABLE_NAME, field.name()))
.collect(Collectors.toList());
// create and write the META_Relations dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(RELATIONS_TABLE_NAME, df);
persistor.createIndexes(indexes);
df.unpersist();
}
示例4: writeEntityAttributeValueTable
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Persist the Entity Attribute Value table
*
* @param entitySchema entity schema
* @param instances RDD of {@link Instance}s
*/
public void writeEntityAttributeValueTable(EntitySchema entitySchema, JavaRDD<Instance> instances) {
IndexMap<String> typeIndex = rdfSchema.getTypeIndex();
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ID_COLUMN_NAME, DataTypes.LongType, false));
fields.add(DataTypes.createStructField(PREDICATE_COLUMN_NAME, DataTypes.IntegerType, false));
fields.add(DataTypes.createStructField(EAV_DATATYPE_COLUMN_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(EAV_LANGUAGE_COLUMN_NAME, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(EAV_VALUE_COLUMN_NAME, DataTypes.StringType, false));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(EAV_TABLE_NAME, ID_COLUMN_NAME));
indexes.add(new Tuple2<>(EAV_TABLE_NAME, PREDICATE_COLUMN_NAME));
indexes.add(new Tuple2<>(EAV_TABLE_NAME, EAV_DATATYPE_COLUMN_NAME));
indexes.add(new Tuple2<>(EAV_TABLE_NAME, EAV_LANGUAGE_COLUMN_NAME));
// get map of type index -> set of attributes
Map<Integer, Set<Predicate>> typeEavPredicates = entitySchema.getTables().stream()
.collect(Collectors.toMap(
table -> typeIndex.getIndex(table.getTypeURI()),
table -> table.getAttributes().stream()
.map(EntityProperty::getPredicate)
.collect(Collectors.toSet())
));
// get all entity attribute values
JavaRDD<Row> rowRDD = instances.flatMap(instance ->
instance.getLiteralPredicates().stream()
// filter predicates that are in the EAV set of at least one of the instance types
.filter(predicate -> instance.getTypes().stream().anyMatch(type ->
typeEavPredicates.containsKey(type) && // type could have been removed (not enough rows, ...)
typeEavPredicates.get(type).contains(predicate)
))
// map to row of values
.flatMap(predicate -> {
Object value = instance.getLiteralValue(predicate);
if (value instanceof Set) {
// return a row for each single value
return ((Set<Object>) value).stream().map(val -> getAttributeRow(instance, predicate, val));
}
return Stream.of(getAttributeRow(instance, predicate, value));//getAttributeRow(instance, predicate, value)
}
)
.collect(Collectors.toList())
);
int predicateCount = typeEavPredicates.values().stream().collect(Collectors.summingInt(Set::size));
// create and write the dataframe
log.info("Writing EAV table of {} predicates", predicateCount);
DataFrame df = sql.createDataFrame(rowRDD, schema);
persistor.writeDataFrame(EAV_TABLE_NAME, df);
log.info("Creating indexes for EAV table");
persistor.createIndexes(indexes);
df.unpersist();
}