Java Row类代码示例

本文整理汇总了Java中org.apache.spark.sql.Row类的典型用法代码示例。如果您正苦于以下问题：Java Row类的具体用法？Java Row怎么用？Java Row使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

Row类属于org.apache.spark.sql包，在下文中一共展示了Row类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: test_getDataSetResult

import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void test_getDataSetResult() {

    StructField[] structFields = new StructField[]{
            new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
            new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
    };

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();
    rows.add(RowFactory.create(1, "v1"));
    rows.add(RowFactory.create(2, "v2"));

    Dataset<Row> df = sparkSession.createDataFrame(rows, structType);

    DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
    Assert.assertEquals(2, dataSetResult.getColumnNames().size());
    Assert.assertEquals(2, dataSetResult.getRows().size());
    Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
    Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
    Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
    Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}

开发者ID:uber，项目名称:uberscriptquery，代码行数:25，代码来源:SparkUtilsTest.java

示例2: readMultiaxialHierarchyFile

import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
 * Reads the LOINC mutliaxial hierarchy file and converts it to a {@link HierarchicalElement}
 * dataset.
 *
 * @param spark the Spark session
 * @param loincHierarchyPath path to the multiaxial hierarchy CSV
 * @return a dataset of {@link HierarchicalElement} representing the hierarchical relationship.
 */
public static Dataset<HierarchicalElement> readMultiaxialHierarchyFile(SparkSession spark,
    String loincHierarchyPath) {

  return spark.read()
      .option("header", true)
      .csv(loincHierarchyPath)
      .select(col("IMMEDIATE_PARENT"), col("CODE"))
      .where(col("IMMEDIATE_PARENT").isNotNull()
          .and(col("IMMEDIATE_PARENT").notEqual(lit(""))))
      .where(col("CODE").isNotNull()
          .and(col("CODE").notEqual(lit(""))))
      .map((MapFunction<Row, HierarchicalElement>) row -> {

        HierarchicalElement element = new HierarchicalElement();

        element.setAncestorSystem(LOINC_CODE_SYSTEM_URI);
        element.setAncestorValue(row.getString(0));

        element.setDescendantSystem(LOINC_CODE_SYSTEM_URI);
        element.setDescendantValue(row.getString(1));

        return element;
      }, Hierarchies.getHierarchicalElementEncoder());
}

开发者ID:cerner，项目名称:bunsen，代码行数:33，代码来源:Loinc.java

示例3: getDataFrameOfElementsWithEntityGroup

import org.apache.spark.sql.Row; //导入依赖的package包/类
public void getDataFrameOfElementsWithEntityGroup() {
    // ---------------------------------------------------------
    final GetDataFrameOfElements operation = new GetDataFrameOfElements.Builder()
            .view(new View.Builder()
                    .entity("entity")
                    .build())
            .build();
    // ---------------------------------------------------------

    final Dataset<Row> df = runExample(operation, null);

    // Restrict to entities involving certain vertices
    final Dataset<Row> seeded = df.filter("vertex = 1 OR vertex = 2");
    String result = seeded.showString(100, 20);
    printJava("df.filter(\"vertex = 1 OR vertex = 2\").show();");
    print("The results are:\n");
    print("```");
    print(result.substring(0, result.length() - 2));
    print("```");

    // Filter by property
    final Dataset<Row> filtered = df.filter("count > 1");
    result = filtered.showString(100, 20);
    printJava("df.filter(\"count > 1\").show();");
    print("The results are:\n");
    print("```");
    print(result.substring(0, result.length() - 2));
    print("```");
}

开发者ID:gchq，项目名称:gaffer-doc，代码行数:30，代码来源:GetDataFrameOfElementsExample.java

示例4: computeJoins

import org.apache.spark.sql.Row; //导入依赖的package包/类
public Dataset<Row> computeJoins(SQLContext sqlContext){
	// compute all the joins
	Dataset<Row> results = node.computeJoinWithChildren(sqlContext);
	// select only the requested result
	Column [] selectedColumns = new Column[node.projection.size()];
	for (int i = 0; i < selectedColumns.length; i++) {
		selectedColumns[i]= new Column(node.projection.get(i));
	}

	// if there is a filter set, apply it
	results =  filter == null ? results.select(selectedColumns) : results.filter(filter).select(selectedColumns);
	
	// if results are distinct
	if(selectDistinct) results = results.distinct();
	
	return results;
	
}

开发者ID:tf-dbis-uni-freiburg，项目名称:PRoST，代码行数:19，代码来源:JoinTree.java

示例5: getSubjectIdentification

import org.apache.spark.sql.Row; //导入依赖的package包/类
public static String getSubjectIdentification( Row row ) {
    String name = row.getAs( "Defendant Name" );
    String gender = row.getAs( "Gender" );
    String race = row.getAs( "Race" );
    String dob = row.getAs( "DOB" );

    StringBuilder sb = new StringBuilder();
    sb
            .append( encoder.encodeToString( StringUtils.getBytesUtf8( name ) ) )
            .append( "|" )
            .append( encoder.encodeToString( StringUtils.getBytesUtf8( gender ) ) )
            .append( "|" )
            .append( encoder.encodeToString( StringUtils.getBytesUtf8( race ) ) )
            .append( "|" )
            .append( encoder.encodeToString( StringUtils.getBytesUtf8( dob ) ) );
    return sb.toString();
}

开发者ID:dataloom，项目名称:integrations，代码行数:18，代码来源:ClerkOfCourtsDemo2010.java

示例6: constructListWithColumnNames

import org.apache.spark.sql.Row; //导入依赖的package包/类
public static List<List<Double>> constructListWithColumnNames(DataFrame dataframe,
    String[] columnNames) {

  List<Double> l;
  Row[] rows;

  List<List<Double>> list = new ArrayList<>();
  for (String name : columnNames) {
    l = new ArrayList<>();
    rows = dataframe.select(name).collect();
    for (Row r : rows) {
      l.add(Double.valueOf(r.get(0).toString()));
    }
    list.add(l);
  }
  return list;

}

开发者ID:zoho-labs，项目名称:Explainer，代码行数:19，代码来源:ExplainerUtils.java

示例7: coding

import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void coding() {

  Coding expectedCoding = condition.getSeverity().getCodingFirstRep();
  Coding actualCoding = decodedCondition.getSeverity().getCodingFirstRep();

  // Codings are a nested array, so we explode them into a table of the coding
  // fields so we can easily select and compare individual fields.
  Dataset<Row> severityCodings = conditionsDataset
      .select(functions.explode(conditionsDataset.col("severity.coding"))
          .alias("coding"))
      .select("coding.*") // Pull all fields in the coding to the top level.
      .cache();

  Assert.assertEquals(expectedCoding.getCode(),
      severityCodings.select("code").head().get(0));
  Assert.assertEquals(expectedCoding.getCode(),
      actualCoding.getCode());

  Assert.assertEquals(expectedCoding.getSystem(),
      severityCodings.select("system").head().get(0));
  Assert.assertEquals(expectedCoding.getSystem(),
      actualCoding.getSystem());

  Assert.assertEquals(expectedCoding.getUserSelected(),
      severityCodings.select("userSelected").head().get(0));
  Assert.assertEquals(expectedCoding.getUserSelected(),
      actualCoding.getUserSelected());

  Assert.assertEquals(expectedCoding.getDisplay(),
      severityCodings.select("display").head().get(0));
  Assert.assertEquals(expectedCoding.getDisplay(),
      actualCoding.getDisplay());
}

开发者ID:cerner，项目名称:bunsen，代码行数:35，代码来源:FhirEncodersTest.java

示例8: read

import org.apache.spark.sql.Row; //导入依赖的package包/类
public List<Value> read() throws IOException
{
    spark.conf().set(SQLConf$.MODULE$.PARQUET_WRITE_LEGACY_FORMAT().key(), isLegacyFormat);

    Dataset<Row> dataFrame = spark.createDataFrame(data, schema).repartition(1);
    File file = new File(SparkTestBase.this.tempFolder.getRoot(), name);
    dataFrame.write().options(options).parquet(file.getPath());

    ArrayList<Value> results = new ArrayList<>();
    try (ParquetReader<Value> reader = ParquetReader
            .builder(new MessagePackReadSupport(), new Path(file.getPath()))
            .build()) {
        Value v;
        while ((v = reader.read()) != null) {
            results.add(v);
        }
    }
    return results;
}

开发者ID:CyberAgent，项目名称:embulk-input-parquet_hadoop，代码行数:20，代码来源:SparkTestBase.java

示例9: getRelatedTypeIDs

import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
 * Map a {@link Instance} into an Iterator of all of its relations
 * represented as rows of (related URI, predicate index, type index, instance ID)
 *
 * @param instance the requested {@link Instance}
 * @return an Iterator of all of its relations represented as rows of (related URI, predicate index, type index, instance ID)
 */
private Iterable<Row> getRelatedTypeIDs(Instance instance) {
    // typeIDs representing references to the instance in each table (or a single one, if instance has a single type)
    final Long id = instance.getId();

    final List<Tuple2<Integer, Long>> instanceTypeIDs = getRelationEntityTypes(instance)
            .map(typeIndex -> new Tuple2<>(typeIndex, id))
            .collect(Collectors.toList());

    return instance.getRelations().stream()
            .flatMap(relation ->
                    instanceTypeIDs.stream()
                            .map(instanceTypeID -> RowFactory.create(
                                    relation.getObjectURI(),
                                    relation.getPredicateIndex(),
                                    instanceTypeID._1(),
                                    instanceTypeID._2()
                            ))
            ).collect(Collectors.toList());
}

开发者ID:Merck，项目名称:rdf2x，代码行数:27，代码来源:RelationExtractor.java

示例10: returnDiff

import org.apache.spark.sql.Row; //导入依赖的package包/类
private Pair<Dataset<Row>, Dataset<Row>> returnDiff(String table1, String table2)
{
    AppleTable leftAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
            "jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
            "SA",
            "",
            "(select * from " + table1 + ")", "table1");

    AppleTable rightAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
            "jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
            "SA",
            "",
            "(select * from " + table2 + ")", "table2");

    return SparkCompare.compareAppleTables(leftAppleTable, rightAppleTable);
}

开发者ID:FINRAOS，项目名称:MegaSparkDiff，代码行数:17，代码来源:JdbcToJdbcTest.java

示例11: main

import org.apache.spark.sql.Row; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    //加载配置
    loadArgs(args);
    //生成Context
    JavaSparkContext context = buildJavaSparkContext();

    Dataset<Row> dataset = SparkDataFileConverter.extractDataFrame(taskInfo, context);
    String mlAlgoName = taskInfo.getSparkTaskAlgorithm().getName();
    MLAlgorithmDesc mlAlgoDesc = MLAlgorithmLoader.getMLAlgorithmDesc(mlAlgoName);

    if (mlAlgoDesc.getComponentsType() == ComponentType.ESTIMATOR) {
        excuteEstimator(taskInfo, dataset);
    } else if (mlAlgoDesc.getComponentsType() == ComponentType.TRANSFORMER) {
        excuteTransformer(taskInfo, dataset);
    }
}

开发者ID:hays2hong，项目名称:stonk，代码行数:17，代码来源:Submiter.java

示例12: testCompareRdd

import org.apache.spark.sql.Row; //导入依赖的package包/类
/**
 * Test of compareRdd method, of class SparkCompare.
 */
@Test
public void testCompareRdd() {
   
    //code to get file1 location
    String file1Path = this.getClass().getClassLoader().
            getResource("TC5NullsAndEmptyData1.txt").getPath();
    
    String file2Path = this.getClass().getClassLoader().
            getResource("TC5NullsAndEmptyData2.txt").getPath();

    Pair<Dataset<Row>, Dataset<Row>> comparisonResult = SparkCompare.compareFiles(file1Path, file2Path);

    try {
        comparisonResult.getLeft().show();
        comparisonResult.getRight().show();
    } catch (Exception e) {
        Assert.fail("Straightforward output of test results somehow failed");
    }
}

开发者ID:FINRAOS，项目名称:MegaSparkDiff，代码行数:23，代码来源:SparkCompareTest.java

示例13: testCompareJDBCtpFileAppleTablesWithDifference

import org.apache.spark.sql.Row; //导入依赖的package包/类
@Test
public void testCompareJDBCtpFileAppleTablesWithDifference()
{
    AppleTable leftAppleTable = SparkFactory.parallelizeJDBCSource("org.hsqldb.jdbc.JDBCDriver",
            "jdbc:hsqldb:hsql://127.0.0.1:9001/testDb",
            "SA",
            "",
            "(select * from Persons1)", "table1");

    String file1Path = this.getClass().getClassLoader().
            getResource("TC1DiffsAndDups1.txt").getPath();

    AppleTable rightAppleTable = SparkFactory.parallelizeTextSource(file1Path,"table2");

    Pair<Dataset<Row>, Dataset<Row>> pair = SparkCompare.compareAppleTables(leftAppleTable, rightAppleTable);

    //the expectation is one difference
    if (pair.getLeft().count() != 2)
    {
        Assert.fail("expected 2 different record in left");
    }
    if (pair.getRight().count() != 5)
    {
        Assert.fail("expected 5 different record in right");
    }
}

开发者ID:FINRAOS，项目名称:MegaSparkDiff，代码行数:27，代码来源:SparkCompareTest.java

示例14: calculate_stats_table

import org.apache.spark.sql.Row; //导入依赖的package包/类
private TableStats calculate_stats_table(Dataset<Row> table, String tableName) {
	TableStats.Builder table_stats_builder = TableStats.newBuilder();
	
	// calculate the stats
	int table_size = (int) table.count();
	int distinct_subjects = (int) table.select(this.column_name_subject).distinct().count();
	boolean is_complex = table_size != distinct_subjects;
	
	// put them in the protobuf object
	table_stats_builder.setSize(table_size)
		.setDistinctSubjects(distinct_subjects)
		.setIsComplex(is_complex)
		.setName(tableName);
	
	return table_stats_builder.build();
}

开发者ID:tf-dbis-uni-freiburg，项目名称:PRoST，代码行数:17，代码来源:VerticalPartitioningLoader.java

示例15: main

import org.apache.spark.sql.Row; //导入依赖的package包/类
public static void main(String[] args) {
	SparkSession spark = SparkSession.builder()
			.master("local[8]")
			.appName("PCAExpt")
			.getOrCreate();

	// Load and parse data
	String filePath = "/home/kchoppella/book/Chapter09/data/covtypeNorm.csv";

	// Loads data.
	Dataset<Row> inDataset = spark.read()
			.format("com.databricks.spark.csv")
			.option("header", "true")
			.option("inferSchema", true)
			.load(filePath);
	ArrayList<String> inputColsList = new ArrayList<String>(Arrays.asList(inDataset.columns()));
	
	//Make single features column for feature vectors 
	inputColsList.remove("class");
	String[] inputCols = inputColsList.parallelStream().toArray(String[]::new);
	
	//Prepare dataset for training with all features in "features" column
	VectorAssembler assembler = new VectorAssembler().setInputCols(inputCols).setOutputCol("features");
	Dataset<Row> dataset = assembler.transform(inDataset);

	PCAModel pca = new PCA()
			.setK(16)
			.setInputCol("features")
			.setOutputCol("pcaFeatures")
			.fit(dataset);

	Dataset<Row> result = pca.transform(dataset).select("pcaFeatures");
	System.out.println("Explained variance:");
	System.out.println(pca.explainedVariance());
	result.show(false);
	// $example off$
	spark.stop();
}

开发者ID:PacktPublishing，项目名称:Machine-Learning-End-to-Endguide-for-Java-developers，代码行数:39，代码来源:PCAExpt.java

注：本文中的org.apache.spark.sql.Row类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。