当前位置: 首页>>代码示例>>Java>>正文


Java RowFactory.create方法代码示例

本文整理汇总了Java中org.apache.spark.sql.RowFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java RowFactory.create方法的具体用法?Java RowFactory.create怎么用?Java RowFactory.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.sql.RowFactory的用法示例。


在下文中一共展示了RowFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: generateData_week_timepoints_by_10_minutes

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "timepoint";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    int weekTotalMinutes = 7 * 24 * 60;
    int timepointIntervalMinutes = 10;
    for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:24,代码来源:QueryEngine.java

示例2: generateData_numbers_1k

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "number";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    for (int i = 0; i <= 1000; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:22,代码来源:QueryEngine.java

示例3: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String line) throws Exception {
    String[] strArr;
    if (StringUtils.isEmpty(dataFile.getDelim())) {
        strArr = new String[]{line};
    } else {
        strArr = line.split(dataFile.getDelim());
    }

    List<FieldInfo> fieldInfos = dataFile.getFieldInfos();
    Object[] objs = new Object[fieldInfos.size()];
    for (int i = 0; i < fieldInfos.size(); i++) {
        FieldInfo fieldInfo = fieldInfos.get(i);
        //单列
        if (fieldInfo.getIndex() != -1) {
            objs[i] = fieldCall(fieldInfo, strArr[i]);
            //多列
        } else {
            int tmpSize = fieldInfo.getEndIndex() - fieldInfo.getStartIndex() + 1;
            String[] tmp = new String[tmpSize];
            System.arraycopy(strArr, fieldInfo.getStartIndex(), tmp, 0, tmpSize);
            objs[i] = fieldCall(fieldInfo, tmp);
        }
    }
    return RowFactory.create(objs);
}
 
开发者ID:hays2hong,项目名称:stonk,代码行数:27,代码来源:LineParse.java

示例4: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Row row) throws Exception {
	List<String> words = Arrays.asList(row.getString(0).split("\\s+"));
	int n = words.size();
	List<Tuple2<String, String>> sequence = new ArrayList<Tuple2<String, String>>(n);
	for (int i = 0; i < n; i++) {
		sequence.add(new Tuple2<String, String>(words.get(i), "UNK"));
	}
	List<String> partsOfSpeech = decode(sequence);
	StringBuilder sb = new StringBuilder();
	for (String pos : partsOfSpeech) {
		sb.append(pos);
		sb.append(' ');
	}
	return RowFactory.create(row.getString(0), sb.toString().trim());
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:17,代码来源:CMMModel.java

示例5: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception {
  // There should only be one 'into' record per key
  Row intoRow = cogrouped._1().iterator().next();
  Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class);
  int intoRowNumFields = intoRow.size();

  Object[] nestedValues = new Object[intoRowNumFields + 1];
  for (int i = 0; i < intoRowNumFields; i++) {
    nestedValues[i] = intoRow.get(i);
  }
  nestedValues[intoRowNumFields] = fromRows;

  Row nested = RowFactory.create(nestedValues);

  return nested;
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:18,代码来源:NestDeriver.java

示例6: rowForRecord

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Row rowForRecord(GenericRecord record) {
  List<Object> values = Lists.newArrayList();

  for (Field field : record.getSchema().getFields()) {
    Object value = record.get(field.name());

    Type fieldType = field.schema().getType();
    if (fieldType.equals(Type.UNION)) {
      fieldType = field.schema().getTypes().get(1).getType();
    }
    // Avro returns Utf8s for strings, which Spark SQL doesn't know how to use
    if (fieldType.equals(Type.STRING) && value != null) {
      value = value.toString();
    }

    values.add(value);
  }

  return RowFactory.create(values.toArray());
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:21,代码来源:AvroTranslator.java

示例7: getAttributeRow

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) {
    return RowFactory.create(
            instance.getId(),
            predicate.getPredicateIndex(),
            LiteralType.toString(predicate.getLiteralType()),
            predicate.getLanguage(),
            value.toString()
    );
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:10,代码来源:InstanceRelationWriter.java

示例8: testAppendWithoutSchema

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testAppendWithoutSchema() {
  Row row = RowFactory.create("hello", 1, true);
  Row appendRow = RowUtils.append(row, -50.0);
  
  assertEquals(appendRow.length(), 4);
  assertEquals(appendRow.get(0), "hello");
  assertEquals(appendRow.get(1), 1);
  assertEquals(appendRow.get(2), true);
  assertEquals(appendRow.get(3), -50.0);
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:12,代码来源:TestRowUtils.java

示例9: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Tuple2<Row, Long> arg0) throws Exception {
	int oldNumCols = arg0._1.length();
	Object [] fields = new Object[oldNumCols + 1];
	for(int i = 0; i < oldNumCols; i++) {
		fields[i] = arg0._1.get(i);
	}
	fields[oldNumCols] = new Double(arg0._2 + 1);
	return RowFactory.create(fields);
}
 
开发者ID:apache,项目名称:systemml,代码行数:11,代码来源:RDDConverterUtilsExt.java

示例10: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String record) throws Exception {
      String[] fields = IOUtilFunctions.splitCSV(record, _delim);
      Object[] objects = new Object[fields.length]; 
      for (int i=0; i<fields.length; i++) {
	      objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]);
      }
      return RowFactory.create(objects);
}
 
开发者ID:apache,项目名称:systemml,代码行数:10,代码来源:FrameRDDConverterUtils.java

示例11: testInputFrameAndMatrixOutputMatrixAndFrame

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testInputFrameAndMatrixOutputMatrixAndFrame() {
	System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
	
	Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<StructField>();
	fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
	Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
	Assert.assertEquals(31.0, matrixtA[2][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}
 
开发者ID:apache,项目名称:systemml,代码行数:39,代码来源:MLContextFrameTest.java

示例12: testTransform

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testTransform() {
	System.out.println("MLContextFrameTest - transform");
	
	Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"),
			RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<StructField>();
	fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true));
	fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	ml.setExplain(true);
	ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS);
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(1.0, matrixtA[0][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}
 
开发者ID:apache,项目名称:systemml,代码行数:40,代码来源:MLContextFrameTest.java

示例13: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String str) throws Exception {
	String[] strings = str.split(",");
	Double[] doubles = new Double[strings.length];
	for (int i = 0; i < strings.length; i++) {
		doubles[i] = Double.parseDouble(strings[i]);
	}
	return RowFactory.create((Object[]) doubles);
}
 
开发者ID:apache,项目名称:systemml,代码行数:10,代码来源:MLContextTest.java

示例14: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(
		SimpleFeature feature )
		throws Exception {
	Object[] fields = new Serializable[schema.size()];

	for (int i = 0; i < schema.size(); i++) {
		Object fieldObj = feature.getAttribute(i);
		if (fieldObj != null) {
			StructField structField = schema.apply(i);
			if (structField.name().equals(
					"geom")) {
				fields[i] = geomWriter.write((Geometry) fieldObj);
			}
			else if (structField.dataType() == DataTypes.TimestampType) {
				fields[i] = ((Timestamp) new Timestamp(
						((Date) fieldObj).getTime()));
			}
			else if (structField.dataType() != null) {
				fields[i] = (Serializable) fieldObj;
			}
			else {
				LOGGER.error("Unexpected attribute in field(" + structField.name() + "): " + fieldObj);
			}
		}
	}

	return RowFactory.create(fields);
}
 
开发者ID:locationtech,项目名称:geowave,代码行数:30,代码来源:SimpleFeatureMapper.java

示例15: call

import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(
        Tuple2<Long,Tuple2<byte[], byte[]>> t) throws Exception {

    Long conglomerateId = t._1;
    byte[] key = t._2._1;
    byte[] value = t._2._2;
    return RowFactory.create(conglomerateId, Bytes.toHex(key), value);
}
 
开发者ID:splicemachine,项目名称:spliceengine,代码行数:10,代码来源:HBaseBulkImportRowToSparkRowFunction.java


注:本文中的org.apache.spark.sql.RowFactory.create方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。