本文整理汇总了Java中org.apache.spark.sql.Row类的典型用法代码示例。如果您正苦于以下问题:Java Row类的具体用法?Java Row怎么用?Java Row使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Row类属于org.apache.spark.sql包,在下文中一共展示了Row类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test_getDataSetResult
import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void test_getDataSetResult() {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1"));
rows.add(RowFactory.create(2, "v2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
Assert.assertEquals(2, dataSetResult.getColumnNames().size());
Assert.assertEquals(2, dataSetResult.getRows().size());
Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
示例2: readMultiaxialHierarchyFile
import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
* Reads the LOINC mutliaxial hierarchy file and converts it to a {@link HierarchicalElement}
* dataset.
*
* @param spark the Spark session
* @param loincHierarchyPath path to the multiaxial hierarchy CSV
* @return a dataset of {@link HierarchicalElement} representing the hierarchical relationship.
*/
public static Dataset<HierarchicalElement> readMultiaxialHierarchyFile(SparkSession spark,
String loincHierarchyPath) {
return spark.read()
.option("header", true)
.csv(loincHierarchyPath)
.select(col("IMMEDIATE_PARENT"), col("CODE"))
.where(col("IMMEDIATE_PARENT").isNotNull()
.and(col("IMMEDIATE_PARENT").notEqual(lit(""))))
.where(col("CODE").isNotNull()
.and(col("CODE").notEqual(lit(""))))
.map((MapFunction<Row, HierarchicalElement>) row -> {
HierarchicalElement element = new HierarchicalElement();
element.setAncestorSystem(LOINC_CODE_SYSTEM_URI);
element.setAncestorValue(row.getString(0));
element.setDescendantSystem(LOINC_CODE_SYSTEM_URI);
element.setDescendantValue(row.getString(1));
return element;
}, Hierarchies.getHierarchicalElementEncoder());
}
示例3: getDataFrameOfElementsWithEntityGroup
import org.apache.spark.sql.Row; //导入依赖的package包/类
public void getDataFrameOfElementsWithEntityGroup() {
// ---------------------------------------------------------
final GetDataFrameOfElements operation = new GetDataFrameOfElements.Builder()
.view(new View.Builder()
.entity("entity")
.build())
.build();
// ---------------------------------------------------------
final Dataset<Row> df = runExample(operation, null);
// Restrict to entities involving certain vertices
final Dataset<Row> seeded = df.filter("vertex = 1 OR vertex = 2");
String result = seeded.showString(100, 20);
printJava("df.filter(\"vertex = 1 OR vertex = 2\").show();");
print("The results are:\n");
print("```");
print(result.substring(0, result.length() - 2));
print("```");
// Filter by property
final Dataset<Row> filtered = df.filter("count > 1");
result = filtered.showString(100, 20);
printJava("df.filter(\"count > 1\").show();");
print("The results are:\n");
print("```");
print(result.substring(0, result.length() - 2));
print("```");
}
示例4: computeJoins
import org.apache.spark.sql.Row; //导入依赖的package包/类
public Dataset<Row> computeJoins(SQLContext sqlContext){
// compute all the joins
Dataset<Row> results = node.computeJoinWithChildren(sqlContext);
// select only the requested result
Column [] selectedColumns = new Column[node.projection.size()];
for (int i = 0; i < selectedColumns.length; i++) {
selectedColumns[i]= new Column(node.projection.get(i));
}
// if there is a filter set, apply it
results = filter == null ? results.select(selectedColumns) : results.filter(filter).select(selectedColumns);
// if results are distinct
if(selectDistinct) results = results.distinct();
return results;
}
示例5: getSubjectIdentification
import org.apache.spark.sql.Row; //导入依赖的package包/类
public static String getSubjectIdentification( Row row ) {
String name = row.getAs( "Defendant Name" );
String gender = row.getAs( "Gender" );
String race = row.getAs( "Race" );
String dob = row.getAs( "DOB" );
StringBuilder sb = new StringBuilder();
sb
.append( encoder.encodeToString( StringUtils.getBytesUtf8( name ) ) )
.append( "|" )
.append( encoder.encodeToString( StringUtils.getBytesUtf8( gender ) ) )
.append( "|" )
.append( encoder.encodeToString( StringUtils.getBytesUtf8( race ) ) )
.append( "|" )
.append( encoder.encodeToString( StringUtils.getBytesUtf8( dob ) ) );
return sb.toString();
}
示例6: constructListWithColumnNames
import org.apache.spark.sql.Row; //导入依赖的package包/类
public static List<List<Double>> constructListWithColumnNames(DataFrame dataframe,
String[] columnNames) {
List<Double> l;
Row[] rows;
List<List<Double>> list = new ArrayList<>();
for (String name : columnNames) {
l = new ArrayList<>();
rows = dataframe.select(name).collect();
for (Row r : rows) {
l.add(Double.valueOf(r.get(0).toString()));
}
list.add(l);
}
return list;
}
示例7: coding
import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void coding() {
Coding expectedCoding = condition.getSeverity().getCodingFirstRep();
Coding actualCoding = decodedCondition.getSeverity().getCodingFirstRep();
// Codings are a nested array, so we explode them into a table of the coding
// fields so we can easily select and compare individual fields.
Dataset<Row> severityCodings = conditionsDataset
.select(functions.explode(conditionsDataset.col("severity.coding"))
.alias("coding"))
.select("coding.*") // Pull all fields in the coding to the top level.
.cache();
Assert.assertEquals(expectedCoding.getCode(),
severityCodings.select("code").head().get(0));
Assert.assertEquals(expectedCoding.getCode(),
actualCoding.getCode());
Assert.assertEquals(expectedCoding.getSystem(),
severityCodings.select("system").head().get(0));
Assert.assertEquals(expectedCoding.getSystem(),
actualCoding.getSystem());
Assert.assertEquals(expectedCoding.getUserSelected(),
severityCodings.select("userSelected").head().get(0));
Assert.assertEquals(expectedCoding.getUserSelected(),
actualCoding.getUserSelected());
Assert.assertEquals(expectedCoding.getDisplay(),
severityCodings.select("display").head().get(0));
Assert.assertEquals(expectedCoding.getDisplay(),
actualCoding.getDisplay());
}
示例8: read
import org.apache.spark.sql.Row; //导入依赖的package包/类
public List<Value> read() throws IOException
{
spark.conf().set(SQLConf$.MODULE$.PARQUET_WRITE_LEGACY_FORMAT().key(), isLegacyFormat);
Dataset<Row> dataFrame = spark.createDataFrame(data, schema).repartition(1);
File file = new File(SparkTestBase.this.tempFolder.getRoot(), name);
dataFrame.write().options(options).parquet(file.getPath());
ArrayList<Value> results = new ArrayList<>();
try (ParquetReader<Value> reader = ParquetReader
.builder(new MessagePackReadSupport(), new Path(file.getPath()))
.build()) {
Value v;
while ((v = reader.read()) != null) {
results.add(v);
}
}
return results;
}
示例9: getRelatedTypeIDs
import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
* Map a {@link Instance} into an Iterator of all of its relations
* represented as rows of (related URI, predicate index, type index, instance ID)
*
* @param instance the requested {@link Instance}
* @return an Iterator of all of its relations represented as rows of (related URI, predicate index, type index, instance ID)
*/
private Iterable<Row> getRelatedTypeIDs(Instance instance) {
// typeIDs representing references to the instance in each table (or a single one, if instance has a single type)
final Long id = instance.getId();
final List<Tuple2<Integer, Long>> instanceTypeIDs = getRelationEntityTypes(instance)
.map(typeIndex -> new Tuple2<>(typeIndex, id))
.collect(Collectors.toList());
return instance.getRelations().stream()
.flatMap(relation ->
instanceTypeIDs.stream()
.map(instanceTypeID -> RowFactory.create(
relation.getObjectURI(),
relation.getPredicateIndex(),
instanceTypeID._1(),
instanceTypeID._2()
))
).collect(Collectors.toList());
}
示例10: returnDiff
import org.apache.spark.sql.Row; //导入依赖的package包/类
private Pair<Dataset<Row>, Dataset<Row>> returnDiff(String table1, String table2)
{
AppleTable leftAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
"jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
"SA",
"",
"(select * from " + table1 + ")", "table1");
AppleTable rightAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
"jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
"SA",
"",
"(select * from " + table2 + ")", "table2");
return SparkCompare.compareAppleTables(leftAppleTable, rightAppleTable);
}
示例11: main
import org.apache.spark.sql.Row; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
//加载配置
loadArgs(args);
//生成Context
JavaSparkContext context = buildJavaSparkContext();
Dataset<Row> dataset = SparkDataFileConverter.extractDataFrame(taskInfo, context);
String mlAlgoName = taskInfo.getSparkTaskAlgorithm().getName();
MLAlgorithmDesc mlAlgoDesc = MLAlgorithmLoader.getMLAlgorithmDesc(mlAlgoName);
if (mlAlgoDesc.getComponentsType() == ComponentType.ESTIMATOR) {
excuteEstimator(taskInfo, dataset);
} else if (mlAlgoDesc.getComponentsType() == ComponentType.TRANSFORMER) {
excuteTransformer(taskInfo, dataset);
}
}
示例12: testCompareRdd
import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
* Test of compareRdd method, of class SparkCompare.
*/
@Test
public void testCompareRdd() {
//code to get file1 location
String file1Path = this.getClass().getClassLoader().
getResource("TC5NullsAndEmptyData1.txt").getPath();
String file2Path = this.getClass().getClassLoader().
getResource("TC5NullsAndEmptyData2.txt").getPath();
Pair<Dataset<Row>, Dataset<Row>> comparisonResult = SparkCompare.compareFiles(file1Path, file2Path);
try {
comparisonResult.getLeft().show();
comparisonResult.getRight().show();
} catch (Exception e) {
Assert.fail("Straightforward output of test results somehow failed");
}
}
示例13: testCompareJDBCtpFileAppleTablesWithDifference
import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void testCompareJDBCtpFileAppleTablesWithDifference()
{
AppleTable leftAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
"jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
"SA",
"",
"(select * from Persons1)", "table1");
String file1Path = this.getClass().getClassLoader().
getResource("TC1DiffsAndDups1.txt").getPath();
AppleTable rightAppleTable = SparkFactory.parallelizeTextSource(file1Path,"table2");
Pair<Dataset<Row>, Dataset<Row>> pair = SparkCompare.compareAppleTables(leftAppleTable, rightAppleTable);
//the expectation is one difference
if (pair.getLeft().count() != 2)
{
Assert.fail("expected 2 different record in left");
}
if (pair.getRight().count() != 5)
{
Assert.fail("expected 5 different record in right");
}
}
示例14: calculate_stats_table
import org.apache.spark.sql.Row; //导入依赖的package包/类
private TableStats calculate_stats_table(Dataset<Row> table, String tableName) {
TableStats.Builder table_stats_builder = TableStats.newBuilder();
// calculate the stats
int table_size = (int) table.count();
int distinct_subjects = (int) table.select(this.column_name_subject).distinct().count();
boolean is_complex = table_size != distinct_subjects;
// put them in the protobuf object
table_stats_builder.setSize(table_size)
.setDistinctSubjects(distinct_subjects)
.setIsComplex(is_complex)
.setName(tableName);
return table_stats_builder.build();
}
示例15: main
import org.apache.spark.sql.Row; //导入依赖的package包/类
public static void main(String[] args) {
SparkSession spark = SparkSession.builder()
.master("local[8]")
.appName("PCAExpt")
.getOrCreate();
// Load and parse data
String filePath = "/home/kchoppella/book/Chapter09/data/covtypeNorm.csv";
// Loads data.
Dataset<Row> inDataset = spark.read()
.format("com.databricks.spark.csv")
.option("header", "true")
.option("inferSchema", true)
.load(filePath);
ArrayList<String> inputColsList = new ArrayList<String>(Arrays.asList(inDataset.columns()));
//Make single features column for feature vectors
inputColsList.remove("class");
String[] inputCols = inputColsList.parallelStream().toArray(String[]::new);
//Prepare dataset for training with all features in "features" column
VectorAssembler assembler = new VectorAssembler().setInputCols(inputCols).setOutputCol("features");
Dataset<Row> dataset = assembler.transform(inDataset);
PCAModel pca = new PCA()
.setK(16)
.setInputCol("features")
.setOutputCol("pcaFeatures")
.fit(dataset);
Dataset<Row> result = pca.transform(dataset).select("pcaFeatures");
System.out.println("Explained variance:");
System.out.println(pca.explainedVariance());
result.show(false);
// $example off$
spark.stop();
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:39,代码来源:PCAExpt.java