本文整理匯總了Java中org.apache.spark.sql.Dataset.first方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.first方法的具體用法?Java Dataset.first怎麽用?Java Dataset.first使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.sql.Dataset
的用法示例。
在下文中一共展示了Dataset.first方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: start
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void start() {
Dataset<Row> householdDf = getHouseholdDataframe();
Dataset<Row> populationDf = getPopulationDataframe();
Dataset<Row> indexDf = joinHouseholdPopulation(householdDf, populationDf);
Dataset<Row> salesDf = getSalesData();
Dataset<Row> salesIndexDf = salesDf
.join(indexDf, salesDf.col("zipcode").equalTo(indexDf.col("zipcode")), "left")
.drop(indexDf.col("zipcode"));
salesIndexDf = salesIndexDf.withColumn("revenue_by_inh", salesIndexDf.col("revenue")
.divide(salesIndexDf.col("pop")));
salesIndexDf = salesIndexDf.orderBy(col("revenue_by_inh").desc());
Row bestRow = salesIndexDf.first();
double bestRevenuePerInhabitant = ((BigDecimal) bestRow.getAs("revenue_by_inh"))
.doubleValue();
int populationOfBestRevenuePerInhabitant = bestRow.getAs("pop");
double incomeOfBestRevenuePerInhabitant = bestRow.getAs("income_per_inh");
salesIndexDf = salesIndexDf.withColumn(
"best_revenue_per_inh",
salesIndexDf.col("pop").divide(salesIndexDf.col("pop"))
.multiply(bestRevenuePerInhabitant));
salesIndexDf = salesIndexDf.withColumn(
"pop_of_best",
lit(populationOfBestRevenuePerInhabitant));
salesIndexDf = salesIndexDf.withColumn(
"income_of_best",
lit(incomeOfBestRevenuePerInhabitant));
salesIndexDf = salesIndexDf.withColumn(
"idx_revenue",
salesIndexDf.col("best_revenue_per_inh")
.divide(salesIndexDf.col("revenue_by_inh")));
salesIndexDf = salesIndexDf.withColumn(
"idx_pop",
salesIndexDf.col("pop").divide(salesIndexDf.col("pop_of_best")));
salesIndexDf = salesIndexDf.withColumn(
"idx_income",
salesIndexDf.col("income_per_inh").divide(salesIndexDf.col("income_of_best")));
salesIndexDf = salesIndexDf.withColumn(
"index",
salesIndexDf.col("idx_revenue").multiply(salesIndexDf.col("idx_pop")
.multiply(salesIndexDf.col("idx_income"))));
salesIndexDf = salesIndexDf.withColumn(
"potential_revenue",
salesIndexDf.col("revenue").multiply(salesIndexDf.col("index")));
salesIndexDf = salesIndexDf
.drop("idx_income")
.drop("idx_pop")
.drop("idx_revenue")
.drop("income_of_best")
.drop("total_income")
.drop("revenue_by_inh")
.drop("pop_of_best")
.drop("best_revenue_per_inh")
.orderBy(salesIndexDf.col("potential_revenue").desc());
salesIndexDf.show();
}
示例2: resourceToJson
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void resourceToJson() {
Dataset<String> jsonDs = Functions.toJson(conditions, "condition");
String conditionJson = jsonDs.first();
Condition parsedCondition = (Condition) CONTEXT.newJsonParser()
.parseResource(conditionJson);
Assert.assertEquals(condition.getId(), parsedCondition.getId());
}