當前位置: 首頁>>代碼示例>>Java>>正文


Java Dataset.count方法代碼示例

本文整理匯總了Java中org.apache.spark.sql.Dataset.count方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.count方法的具體用法?Java Dataset.count怎麽用?Java Dataset.count使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.sql.Dataset的用法示例。


在下文中一共展示了Dataset.count方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: calculate_stats_table

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private TableStats calculate_stats_table(Dataset<Row> table, String tableName) {
	TableStats.Builder table_stats_builder = TableStats.newBuilder();
	
	// calculate the stats
	int table_size = (int) table.count();
	int distinct_subjects = (int) table.select(this.column_name_subject).distinct().count();
	boolean is_complex = table_size != distinct_subjects;
	
	// put them in the protobuf object
	table_stats_builder.setSize(table_size)
		.setDistinctSubjects(distinct_subjects)
		.setIsComplex(is_complex)
		.setName(tableName);
	
	return table_stats_builder.build();
}
 
開發者ID:tf-dbis-uni-freiburg,項目名稱:PRoST,代碼行數:17,代碼來源:VerticalPartitioningLoader.java

示例2: withConceptMaps

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
/**
 * Returns a new ConceptMaps instance that includes the given maps.
 *
 * @param conceptMaps concept maps to add to the returned collection.
 * @return a new ConceptMaps instance with the values added.
 */
public ConceptMaps withConceptMaps(Dataset<ConceptMap> conceptMaps) {

  Dataset<UrlAndVersion> newMembers = getUrlAndVersions(conceptMaps);

  if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) {

    throw new IllegalArgumentException(
        "Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion");
  }

  // Remove the concept contents for persistence. This is most easily done in the ConceptMap
  // object by setting the group to an empty list.
  Dataset<ConceptMap> withoutConcepts = conceptMaps
      .map((MapFunction<ConceptMap,ConceptMap>) conceptMap -> {

        // Remove the elements rather than the groups to preserved the
        // "unmapped" structure in a group that can refer to other
        // concept maps.
        ConceptMap withoutElements = conceptMap.copy();

        List<ConceptMapGroupComponent> updatedGroups = new ArrayList<>();

        for (ConceptMapGroupComponent group: withoutElements.getGroup()) {

          group.setElement(new ArrayList<>());
          updatedGroups.add(group);
        }

        withoutElements.setGroup(updatedGroups);

        return withoutElements;
      }, CONCEPT_MAP_ENCODER);

  Dataset<Mapping> newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator,
      MAPPING_ENCODER);

  return withConceptMaps(withoutConcepts, newMappings);
}
 
開發者ID:cerner,項目名稱:bunsen,代碼行數:45,代碼來源:ConceptMaps.java

示例3: execute

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public void execute() {
	// use the selected database
	sqlContext.sql("USE "+ this.databaseName);
	logger.info("USE "+ this.databaseName);
	
	long totalStartTime = System.currentTimeMillis();
	
	// compute the singular nodes data
	queryTree.computeSingularNodeData(sqlContext);
	logger.info("COMPUTED nodes data");
	
	
	long startTime;
	long executionTime;
	
	// compute the joins
	
	Dataset<Row> results = queryTree.computeJoins(sqlContext);
	startTime = System.currentTimeMillis();
	long number_results = -1;
	// if specified, save the results in HDFS, just count otherwise
	if (this.outputFile != null) {
		results.write().parquet(this.outputFile);
	} else {
		number_results = results.count();
		logger.info("Number of Results: " + String.valueOf(number_results));
	}
	executionTime = System.currentTimeMillis() - startTime;
	logger.info("Execution time JOINS: " + String.valueOf(executionTime));
	
	// save the results in the list
	this.query_time_results.add(new String[]{queryTree.query_name, 
			String.valueOf(executionTime), 
			String.valueOf(number_results)});
	
	
	long totalExecutionTime = System.currentTimeMillis() - totalStartTime;
	logger.info("Total execution time: " + String.valueOf(totalExecutionTime));
}
 
開發者ID:tf-dbis-uni-freiburg,項目名稱:PRoST,代碼行數:40,代碼來源:Executor.java

示例4: tabularDataset

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
static Tabular tabularDataset(Dataset<Row> ds){
    return new Tabular(){
        public int          numRows()                   { return (int)ds.count(); }
        public int          numCols()                   { return ds.columns().length; }
        public List<String> headers()                   { return Arrays.asList(ds.columns()) ; }
        public String val(int rowNum, int colNum) {
            int ri = rowNum-1;
            int ci = colNum-1;
            Object v = ds.collectAsList().get(ri).get(ci);
            return v == null ? "" : v.toString(); }
    };
}
 
開發者ID:gboleslavsky,項目名稱:HiveUnit,代碼行數:13,代碼來源:Tabular.java

示例5: withValueSets

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
/**
 * Returns a new ValueSets instance that includes the given value sets.
 *
 * @param valueSets the value sets to add to the returned collection.
 * @return a new ValueSets instance with the added value sets.
 */
public ValueSets withValueSets(Dataset<ValueSet> valueSets) {

  Dataset<UrlAndVersion> newMembers = getUrlAndVersions(valueSets);

  // Ensure that there are no duplicates among the value sets
  if (hasDuplicateUrlAndVersions(newMembers) || valueSets.count() != newMembers.count()) {

    throw new IllegalArgumentException(
        "Cannot add value sets having duplicate valueSetUri and valueSetVersion");
  }

  // The value set concepts will be stored in the values table for persistence, so we remove
  // them from the individual value sets. This can be done most easily by setting concepts to an
  // empty list.
  Dataset<ValueSet> withoutConcepts = valueSets.map((MapFunction<ValueSet,ValueSet>) valueSet -> {
    ValueSet valueSetWithoutConcepts = valueSet.copy();

    List<ConceptSetComponent> updatedInclusions = new ArrayList<>();

    for (ConceptSetComponent inclusion: valueSet.getCompose().getInclude()) {

      ConceptSetComponent inclusionWithoutConcepts = inclusion.copy();

      inclusionWithoutConcepts.setConcept(new ArrayList<>());
      updatedInclusions.add(inclusionWithoutConcepts);
    }

    valueSetWithoutConcepts.getCompose().setInclude(updatedInclusions);

    return valueSetWithoutConcepts;
  }, VALUE_SET_ENCODER);

  Dataset<Value> newValues = valueSets.flatMap(ValueSets::expandValuesIterator, VALUE_ENCODER);

  return withValueSets(withoutConcepts, newValues);
}
 
開發者ID:cerner,項目名稱:bunsen,代碼行數:43,代碼來源:ValueSets.java


注:本文中的org.apache.spark.sql.Dataset.count方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。