本文整理汇总了Java中org.apache.spark.sql.RowFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java RowFactory.create方法的具体用法?Java RowFactory.create怎么用?Java RowFactory.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.RowFactory
的用法示例。
在下文中一共展示了RowFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: generateData_week_timepoints_by_10_minutes
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "timepoint";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
int weekTotalMinutes = 7 * 24 * 60;
int timepointIntervalMinutes = 10;
for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例2: generateData_numbers_1k
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "number";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
for (int i = 0; i <= 1000; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例3: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String line) throws Exception {
String[] strArr;
if (StringUtils.isEmpty(dataFile.getDelim())) {
strArr = new String[]{line};
} else {
strArr = line.split(dataFile.getDelim());
}
List<FieldInfo> fieldInfos = dataFile.getFieldInfos();
Object[] objs = new Object[fieldInfos.size()];
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fieldInfo = fieldInfos.get(i);
//单列
if (fieldInfo.getIndex() != -1) {
objs[i] = fieldCall(fieldInfo, strArr[i]);
//多列
} else {
int tmpSize = fieldInfo.getEndIndex() - fieldInfo.getStartIndex() + 1;
String[] tmp = new String[tmpSize];
System.arraycopy(strArr, fieldInfo.getStartIndex(), tmp, 0, tmpSize);
objs[i] = fieldCall(fieldInfo, tmp);
}
}
return RowFactory.create(objs);
}
示例4: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Row row) throws Exception {
List<String> words = Arrays.asList(row.getString(0).split("\\s+"));
int n = words.size();
List<Tuple2<String, String>> sequence = new ArrayList<Tuple2<String, String>>(n);
for (int i = 0; i < n; i++) {
sequence.add(new Tuple2<String, String>(words.get(i), "UNK"));
}
List<String> partsOfSpeech = decode(sequence);
StringBuilder sb = new StringBuilder();
for (String pos : partsOfSpeech) {
sb.append(pos);
sb.append(' ');
}
return RowFactory.create(row.getString(0), sb.toString().trim());
}
示例5: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception {
// There should only be one 'into' record per key
Row intoRow = cogrouped._1().iterator().next();
Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class);
int intoRowNumFields = intoRow.size();
Object[] nestedValues = new Object[intoRowNumFields + 1];
for (int i = 0; i < intoRowNumFields; i++) {
nestedValues[i] = intoRow.get(i);
}
nestedValues[intoRowNumFields] = fromRows;
Row nested = RowFactory.create(nestedValues);
return nested;
}
示例6: rowForRecord
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Row rowForRecord(GenericRecord record) {
List<Object> values = Lists.newArrayList();
for (Field field : record.getSchema().getFields()) {
Object value = record.get(field.name());
Type fieldType = field.schema().getType();
if (fieldType.equals(Type.UNION)) {
fieldType = field.schema().getTypes().get(1).getType();
}
// Avro returns Utf8s for strings, which Spark SQL doesn't know how to use
if (fieldType.equals(Type.STRING) && value != null) {
value = value.toString();
}
values.add(value);
}
return RowFactory.create(values.toArray());
}
示例7: getAttributeRow
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) {
return RowFactory.create(
instance.getId(),
predicate.getPredicateIndex(),
LiteralType.toString(predicate.getLiteralType()),
predicate.getLanguage(),
value.toString()
);
}
示例8: testAppendWithoutSchema
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testAppendWithoutSchema() {
Row row = RowFactory.create("hello", 1, true);
Row appendRow = RowUtils.append(row, -50.0);
assertEquals(appendRow.length(), 4);
assertEquals(appendRow.get(0), "hello");
assertEquals(appendRow.get(1), 1);
assertEquals(appendRow.get(2), true);
assertEquals(appendRow.get(3), -50.0);
}
示例9: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(Tuple2<Row, Long> arg0) throws Exception {
int oldNumCols = arg0._1.length();
Object [] fields = new Object[oldNumCols + 1];
for(int i = 0; i < oldNumCols; i++) {
fields[i] = arg0._1.get(i);
}
fields[oldNumCols] = new Double(arg0._2 + 1);
return RowFactory.create(fields);
}
示例10: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String record) throws Exception {
String[] fields = IOUtilFunctions.splitCSV(record, _delim);
Object[] objects = new Object[fields.length];
for (int i=0; i<fields.length; i++) {
objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]);
}
return RowFactory.create(objects);
}
示例11: testInputFrameAndMatrixOutputMatrixAndFrame
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testInputFrameAndMatrixOutputMatrixAndFrame() {
System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};
JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA));
List<StructField> fieldsA = new ArrayList<StructField>();
fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
StructType schemaA = DataTypes.createStructType(fieldsA);
Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);
String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";
Script script = dml(dmlString)
.in("A", dataFrameA,
new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
.out("tA").out("tAM");
MLResults results = ml.execute(script);
double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
Assert.assertEquals(31.0, matrixtA[2][2], 0.0);
Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
dataFrame_tA.printSchema();
dataFrame_tA.show();
Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
dataFrame_tAM.printSchema();
dataFrame_tAM.show();
}
示例12: testTransform
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Test
public void testTransform() {
System.out.println("MLContextFrameTest - transform");
Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"),
RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")};
JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA));
List<StructField> fieldsA = new ArrayList<StructField>();
fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true));
fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true));
fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true));
StructType schemaA = DataTypes.createStructType(fieldsA);
Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);
String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");";
Script script = dml(dmlString)
.in("A", dataFrameA,
new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
.out("tA").out("tAM");
ml.setExplain(true);
ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS);
MLResults results = ml.execute(script);
double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
Assert.assertEquals(1.0, matrixtA[0][2], 0.0);
Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
dataFrame_tA.printSchema();
dataFrame_tA.show();
Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
dataFrame_tAM.printSchema();
dataFrame_tAM.show();
}
示例13: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(String str) throws Exception {
String[] strings = str.split(",");
Double[] doubles = new Double[strings.length];
for (int i = 0; i < strings.length; i++) {
doubles[i] = Double.parseDouble(strings[i]);
}
return RowFactory.create((Object[]) doubles);
}
示例14: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(
SimpleFeature feature )
throws Exception {
Object[] fields = new Serializable[schema.size()];
for (int i = 0; i < schema.size(); i++) {
Object fieldObj = feature.getAttribute(i);
if (fieldObj != null) {
StructField structField = schema.apply(i);
if (structField.name().equals(
"geom")) {
fields[i] = geomWriter.write((Geometry) fieldObj);
}
else if (structField.dataType() == DataTypes.TimestampType) {
fields[i] = ((Timestamp) new Timestamp(
((Date) fieldObj).getTime()));
}
else if (structField.dataType() != null) {
fields[i] = (Serializable) fieldObj;
}
else {
LOGGER.error("Unexpected attribute in field(" + structField.name() + "): " + fieldObj);
}
}
}
return RowFactory.create(fields);
}
示例15: call
import org.apache.spark.sql.RowFactory; //导入方法依赖的package包/类
@Override
public Row call(
Tuple2<Long,Tuple2<byte[], byte[]>> t) throws Exception {
Long conglomerateId = t._1;
byte[] key = t._2._1;
byte[] value = t._2._2;
return RowFactory.create(conglomerateId, Bytes.toHex(key), value);
}