本文整理匯總了Java中org.apache.spark.sql.Dataset.count方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.count方法的具體用法?Java Dataset.count怎麽用?Java Dataset.count使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.sql.Dataset
的用法示例。
在下文中一共展示了Dataset.count方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: calculate_stats_table
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private TableStats calculate_stats_table(Dataset<Row> table, String tableName) {
TableStats.Builder table_stats_builder = TableStats.newBuilder();
// calculate the stats
int table_size = (int) table.count();
int distinct_subjects = (int) table.select(this.column_name_subject).distinct().count();
boolean is_complex = table_size != distinct_subjects;
// put them in the protobuf object
table_stats_builder.setSize(table_size)
.setDistinctSubjects(distinct_subjects)
.setIsComplex(is_complex)
.setName(tableName);
return table_stats_builder.build();
}
示例2: withConceptMaps
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
/**
* Returns a new ConceptMaps instance that includes the given maps.
*
* @param conceptMaps concept maps to add to the returned collection.
* @return a new ConceptMaps instance with the values added.
*/
public ConceptMaps withConceptMaps(Dataset<ConceptMap> conceptMaps) {
Dataset<UrlAndVersion> newMembers = getUrlAndVersions(conceptMaps);
if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) {
throw new IllegalArgumentException(
"Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion");
}
// Remove the concept contents for persistence. This is most easily done in the ConceptMap
// object by setting the group to an empty list.
Dataset<ConceptMap> withoutConcepts = conceptMaps
.map((MapFunction<ConceptMap,ConceptMap>) conceptMap -> {
// Remove the elements rather than the groups to preserved the
// "unmapped" structure in a group that can refer to other
// concept maps.
ConceptMap withoutElements = conceptMap.copy();
List<ConceptMapGroupComponent> updatedGroups = new ArrayList<>();
for (ConceptMapGroupComponent group: withoutElements.getGroup()) {
group.setElement(new ArrayList<>());
updatedGroups.add(group);
}
withoutElements.setGroup(updatedGroups);
return withoutElements;
}, CONCEPT_MAP_ENCODER);
Dataset<Mapping> newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator,
MAPPING_ENCODER);
return withConceptMaps(withoutConcepts, newMappings);
}
示例3: execute
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public void execute() {
// use the selected database
sqlContext.sql("USE "+ this.databaseName);
logger.info("USE "+ this.databaseName);
long totalStartTime = System.currentTimeMillis();
// compute the singular nodes data
queryTree.computeSingularNodeData(sqlContext);
logger.info("COMPUTED nodes data");
long startTime;
long executionTime;
// compute the joins
Dataset<Row> results = queryTree.computeJoins(sqlContext);
startTime = System.currentTimeMillis();
long number_results = -1;
// if specified, save the results in HDFS, just count otherwise
if (this.outputFile != null) {
results.write().parquet(this.outputFile);
} else {
number_results = results.count();
logger.info("Number of Results: " + String.valueOf(number_results));
}
executionTime = System.currentTimeMillis() - startTime;
logger.info("Execution time JOINS: " + String.valueOf(executionTime));
// save the results in the list
this.query_time_results.add(new String[]{queryTree.query_name,
String.valueOf(executionTime),
String.valueOf(number_results)});
long totalExecutionTime = System.currentTimeMillis() - totalStartTime;
logger.info("Total execution time: " + String.valueOf(totalExecutionTime));
}
示例4: tabularDataset
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
static Tabular tabularDataset(Dataset<Row> ds){
return new Tabular(){
public int numRows() { return (int)ds.count(); }
public int numCols() { return ds.columns().length; }
public List<String> headers() { return Arrays.asList(ds.columns()) ; }
public String val(int rowNum, int colNum) {
int ri = rowNum-1;
int ci = colNum-1;
Object v = ds.collectAsList().get(ri).get(ci);
return v == null ? "" : v.toString(); }
};
}
示例5: withValueSets
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
/**
* Returns a new ValueSets instance that includes the given value sets.
*
* @param valueSets the value sets to add to the returned collection.
* @return a new ValueSets instance with the added value sets.
*/
public ValueSets withValueSets(Dataset<ValueSet> valueSets) {
Dataset<UrlAndVersion> newMembers = getUrlAndVersions(valueSets);
// Ensure that there are no duplicates among the value sets
if (hasDuplicateUrlAndVersions(newMembers) || valueSets.count() != newMembers.count()) {
throw new IllegalArgumentException(
"Cannot add value sets having duplicate valueSetUri and valueSetVersion");
}
// The value set concepts will be stored in the values table for persistence, so we remove
// them from the individual value sets. This can be done most easily by setting concepts to an
// empty list.
Dataset<ValueSet> withoutConcepts = valueSets.map((MapFunction<ValueSet,ValueSet>) valueSet -> {
ValueSet valueSetWithoutConcepts = valueSet.copy();
List<ConceptSetComponent> updatedInclusions = new ArrayList<>();
for (ConceptSetComponent inclusion: valueSet.getCompose().getInclude()) {
ConceptSetComponent inclusionWithoutConcepts = inclusion.copy();
inclusionWithoutConcepts.setConcept(new ArrayList<>());
updatedInclusions.add(inclusionWithoutConcepts);
}
valueSetWithoutConcepts.getCompose().setInclude(updatedInclusions);
return valueSetWithoutConcepts;
}, VALUE_SET_ENCODER);
Dataset<Value> newValues = valueSets.flatMap(ValueSets::expandValuesIterator, VALUE_ENCODER);
return withValueSets(withoutConcepts, newValues);
}