本文整理汇总了Java中org.apache.spark.sql.types.DataTypes类的典型用法代码示例。如果您正苦于以下问题:Java DataTypes类的具体用法?Java DataTypes怎么用?Java DataTypes使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataTypes类属于org.apache.spark.sql.types包,在下文中一共展示了DataTypes类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createNGramDataFrame
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
* Creates a n-gram data frame from text lines.
* @param lines
* @return a n-gram data frame.
*/
DataFrame createNGramDataFrame(JavaRDD<String> lines) {
JavaRDD<Row> rows = lines.map(new Function<String, Row>(){
private static final long serialVersionUID = -4332903997027358601L;
@Override
public Row call(String line) throws Exception {
return RowFactory.create(Arrays.asList(line.split("\\s+")));
}
});
StructType schema = new StructType(new StructField[] {
new StructField("words",
DataTypes.createArrayType(DataTypes.StringType), false,
Metadata.empty()) });
DataFrame wordDF = new SQLContext(jsc).createDataFrame(rows, schema);
// build a bigram language model
NGram transformer = new NGram().setInputCol("words")
.setOutputCol("ngrams").setN(2);
DataFrame ngramDF = transformer.transform(wordDF);
ngramDF.show(10, false);
return ngramDF;
}
示例2: test_getDataSetResult
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Test
public void test_getDataSetResult() {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1"));
rows.add(RowFactory.create(2, "v2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
Assert.assertEquals(2, dataSetResult.getColumnNames().size());
Assert.assertEquals(2, dataSetResult.getRows().size());
Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
示例3: generateData_week_timepoints_by_10_minutes
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "timepoint";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
int weekTotalMinutes = 7 * 24 * 60;
int timepointIntervalMinutes = 10;
for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例4: parse
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
* Parses a list of PoS-tagged sentences, each on a line and writes the result to an output
* file in a specified output format.
* @param jsc
* @param sentences
* @param outputFileName
* @param outuptFormat
*/
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
JavaRDD<String> input = jsc.parallelize(sentences);
JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
private static final long serialVersionUID = -812004521983071103L;
public Row call(DependencyGraph graph) {
return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
}
});
StructType schema = new StructType(new StructField[]{
new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
});
SQLContext sqlContext = new SQLContext(jsc);
DataFrame df = sqlContext.createDataFrame(rows, schema);
if (outputFormat == OutputFormat.TEXT)
df.select("dependency").write().text(outputFileName);
else
df.repartition(1).write().json(outputFileName);
}
示例5: datasetSchema
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
static public StructType datasetSchema(Map<String, String> mappa) {
StructType struct = new StructType();
for (Map.Entry<String, String> entry : mappa.entrySet()) {
switch (entry.getValue().toLowerCase()) {
case "string":
case "dictionary":
struct = struct.add(entry.getKey(), DataTypes.StringType);
break;
case "int":
struct = struct.add(entry.getKey(), DataTypes.IntegerType);
break;
case "double":
struct = struct.add(entry.getKey(), DataTypes.DoubleType);
break;
}
}
return struct;
}
示例6: translateDataType
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
static
public DataType translateDataType(org.dmg.pmml.DataType dataType){
switch(dataType){
case STRING:
return DataTypes.StringType;
case INTEGER:
return DataTypes.IntegerType;
case FLOAT:
return DataTypes.FloatType;
case DOUBLE:
return DataTypes.DoubleType;
case BOOLEAN:
return DataTypes.BooleanType;
default:
throw new IllegalArgumentException();
}
}
示例7: getJdbcTypeString
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
return "DATETIME";
} else if (dataType == DataTypes.StringType) {
return sqlTypeForString;
} else if (dataType == DataTypes.IntegerType) {
return "INT";
} else if (dataType == DataTypes.LongType) {
return "BIGINT";
} else if (dataType == DataTypes.FloatType) {
return "FLOAT";
} else if (dataType == DataTypes.DoubleType) {
return "DOUBLE";
} else if (dataType == DataTypes.BooleanType) {
return "TINYINT";
} else if (dataType == DataTypes.ByteType) {
return "SMALLINT";
} else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
} else {
throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
}
}
示例8: init
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Override
public StructField init(Evaluator evaluator){
OutputField field = getField();
DataType dataType = field.getDataType();
if(dataType == null){
try {
dataType = OutputUtil.getDataType(field.getOutputField(), (ModelEvaluator<?>)evaluator);
this.formatString = false;
} catch(PMMLException pe){
dataType = DataType.STRING;
this.formatString = true;
}
}
return DataTypes.createStructField(getColumnName(), SchemaUtil.translateDataType(dataType), false);
}
示例9: generateData_numbers_1k
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "number";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
for (int i = 0; i <= 1000; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例10: writeEntityMetadata
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
* Write metadata describing entity tables
*
* @param entitySchema the entity schema
*/
public void writeEntityMetadata(EntitySchema entitySchema) {
// create the schema
List<StructField> fields = new ArrayList<>();
fields.add(DataTypes.createStructField(ENTITIES_NAME, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_URI, DataTypes.StringType, false));
fields.add(DataTypes.createStructField(ENTITIES_LABEL, DataTypes.StringType, true));
fields.add(DataTypes.createStructField(ENTITIES_NUM_ROWS, DataTypes.LongType, false));
StructType schema = DataTypes.createStructType(fields);
List<Tuple2<String, String>> indexes = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_URI));
List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_NAME));
final Map<String, String> uriLabels = rdfSchema.getUriLabels();
// create table rows
List<Row> rows = entitySchema.getTables().stream()
.map(table -> {
Object[] valueArray = new Object[]{
table.getName(),
table.getTypeURI(),
uriLabels.get(table.getTypeURI()),
table.getNumRows()
};
return RowFactory.create(valueArray);
}).collect(Collectors.toList());
// create and write the META_Entities dataframe
DataFrame df = sql.createDataFrame(rows, schema);
persistor.writeDataFrame(ENTITIES_TABLE_NAME, df);
persistor.createPrimaryKeys(primaryKeys);
persistor.createIndexes(indexes);
df.unpersist();
}
示例11: getDataType
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private DataType getDataType(int type) {
switch (type) {
case LiteralType.BOOLEAN:
return DataTypes.BooleanType;
case LiteralType.STRING:
return DataTypes.StringType;
case LiteralType.FLOAT:
return DataTypes.FloatType;
case LiteralType.DOUBLE:
return DataTypes.DoubleType;
case LiteralType.INTEGER:
return DataTypes.IntegerType;
case LiteralType.LONG:
return DataTypes.LongType;
case LiteralType.DATETIME:
// datetime not supported due to timezone issues with java.sql.Timestamp
// check the InstanceAggregator for more info
return DataTypes.StringType;
}
throw new NotImplementedException("Not able to write literal type " + type);
}
示例12: getDataTypeFromReturnType
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
String typeName = method.getReturnType().getSimpleName();
switch (typeName) {
case "int":
case "Integer":
return DataTypes.IntegerType;
case "long":
case "Long":
return DataTypes.LongType;
case "float":
case "Float":
return DataTypes.FloatType;
case "boolean":
case "Boolean":
return DataTypes.BooleanType;
case "double":
case "Double":
return DataTypes.DoubleType;
case "String":
return DataTypes.StringType;
case "Date":
case "date":
return DataTypes.DateType;
case "Timestamp":
return DataTypes.TimestampType;
case "short":
case "Short":
return DataTypes.ShortType;
case "Object":
return DataTypes.BinaryType;
default:
log.debug("Using default for type [{}]", typeName);
return DataTypes.BinaryType;
}
}
示例13: convertSqlTypeToSparkSqlDataType
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
if (sqlType == java.sql.Types.BOOLEAN) {
return DataTypes.BooleanType;
} else if (sqlType == Types.TINYINT) {
return DataTypes.ByteType;
} else if (sqlType == Types.SMALLINT) {
return DataTypes.ShortType;
} else if (sqlType == java.sql.Types.INTEGER) {
return DataTypes.IntegerType;
} else if (sqlType == java.sql.Types.BIGINT) {
return DataTypes.LongType;
} else if (sqlType == Types.DECIMAL) {
return DataTypes.createDecimalType();
} else if (sqlType == java.sql.Types.FLOAT) {
return DataTypes.FloatType;
} else if (sqlType == java.sql.Types.DOUBLE) {
return DataTypes.DoubleType;
} else if (sqlType == Types.DATE) {
return DataTypes.DateType;
} else if (sqlType == Types.TIME) {
return DataTypes.TimestampType;
} else if (sqlType == Types.TIMESTAMP) {
return DataTypes.TimestampType;
} else if (sqlType == java.sql.Types.VARCHAR) {
return DataTypes.StringType;
} else {
logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
return DataTypes.StringType;
}
}
示例14: readJsonWithSchema
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Test
public void readJsonWithSchema() throws Exception {
Map<String, Object> paramMap = new HashMap<>();
paramMap.put(FileSystemInput.FORMAT_CONFIG, "json");
paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(JSON_DATA).getPath());
paramMap.put(FileSystemInput.FIELD_NAMES_CONFIG, Lists.newArrayList("field1", "field2", "field3", "field4"));
paramMap.put(FileSystemInput.FIELD_TYPES_CONFIG, Lists.newArrayList("int", "string", "boolean", "string"));
config = ConfigFactory.parseMap(paramMap);
FileSystemInput csvInput = new FileSystemInput();
csvInput.configure(config);
Dataset<Row> dataFrame = csvInput.read();
dataFrame.printSchema();
dataFrame.show();
assertEquals(4, dataFrame.count());
Row first = dataFrame.first();
assertEquals("dog", first.getString(3));
assertEquals("field1", first.schema().fields()[0].name());
assertEquals(DataTypes.IntegerType, first.schema().fields()[0].dataType());
}
示例15: SparkRDF4JSparqlRelation
import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
* Constructor for a new {@link SparkRDF4JSparqlRelation} based on the given
* service, query, schema, and context.
*
* @param service
* The URL to the SPARQL service to be used for this query.
* @param parsedQuery
* The preparsed SPARQL query.
* @param schema
* The schema to use for the results of the query.
* @param sqlContext
* The context for the query.
*/
SparkRDF4JSparqlRelation(String service, ParsedQuery parsedQuery, StructType schema, SQLContext sqlContext) {
this.serviceField = Objects.requireNonNull(service);
this.queryField = Objects.requireNonNull(parsedQuery);
this.schemaField = Optional.ofNullable(schema).orElseGet(() -> {
// These bindings are guaranteed to be present and are not nullable
Set<String> assuredBindingNames = this.queryField.getTupleExpr().getAssuredBindingNames();
// If bindings are only in the following they are nullable
Set<String> bindingNames = this.queryField.getTupleExpr().getBindingNames();
StructType result = new StructType();
for(String binding : bindingNames) {
result = result.add(binding, DataTypes.StringType, !(assuredBindingNames.contains(binding)));
};
return result;
});
this.sqlContextField = sqlContext;
}