本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.IntegerType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.IntegerType方法的具体用法?Java DataTypes.IntegerType怎么用?Java DataTypes.IntegerType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.types.DataTypes
的用法示例。
在下文中一共展示了DataTypes.IntegerType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test_getDataSetResult
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_getDataSetResult() {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1"));
rows.add(RowFactory.create(2, "v2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
Assert.assertEquals(2, dataSetResult.getColumnNames().size());
Assert.assertEquals(2, dataSetResult.getRows().size());
Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
示例2: generateData_week_timepoints_by_10_minutes
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "timepoint";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
int weekTotalMinutes = 7 * 24 * 60;
int timepointIntervalMinutes = 10;
for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例3: translateDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
static
public DataType translateDataType(org.dmg.pmml.DataType dataType){
switch(dataType){
case STRING:
return DataTypes.StringType;
case INTEGER:
return DataTypes.IntegerType;
case FLOAT:
return DataTypes.FloatType;
case DOUBLE:
return DataTypes.DoubleType;
case BOOLEAN:
return DataTypes.BooleanType;
default:
throw new IllegalArgumentException();
}
}
示例4: getJdbcTypeString
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
return "DATETIME";
} else if (dataType == DataTypes.StringType) {
return sqlTypeForString;
} else if (dataType == DataTypes.IntegerType) {
return "INT";
} else if (dataType == DataTypes.LongType) {
return "BIGINT";
} else if (dataType == DataTypes.FloatType) {
return "FLOAT";
} else if (dataType == DataTypes.DoubleType) {
return "DOUBLE";
} else if (dataType == DataTypes.BooleanType) {
return "TINYINT";
} else if (dataType == DataTypes.ByteType) {
return "SMALLINT";
} else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
} else {
throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
}
}
示例5: fromSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert a datavec schema to a
* struct type in spark
*
* @param schema the schema to convert
* @return the datavec struct type
*/
public static StructType fromSchema(Schema schema) {
StructField[] structFields = new StructField[schema.numColumns()];
for (int i = 0; i < structFields.length; i++) {
switch (schema.getColumnTypes().get(i)) {
case Double:
structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
break;
case Integer:
structFields[i] =
new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
break;
case Long:
structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
break;
case Float:
structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
break;
default:
throw new IllegalStateException(
"This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
}
}
return new StructType(structFields);
}
示例6: generateData_numbers_1k
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
StructField[] structFields = new StructField[1];
org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
String column = "number";
StructField structField = new StructField(column, dataType, true, Metadata.empty());
structFields[0] = structField;
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
for (int i = 0; i <= 1000; i++) {
Object[] objects = new Object[structFields.length];
objects[0] = i;
Row row = RowFactory.create(objects);
rows.add(row);
}
Dataset<Row> df = spark.createDataFrame(rows, structType);
return df;
}
示例7: getDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private DataType getDataType(int type) {
switch (type) {
case LiteralType.BOOLEAN:
return DataTypes.BooleanType;
case LiteralType.STRING:
return DataTypes.StringType;
case LiteralType.FLOAT:
return DataTypes.FloatType;
case LiteralType.DOUBLE:
return DataTypes.DoubleType;
case LiteralType.INTEGER:
return DataTypes.IntegerType;
case LiteralType.LONG:
return DataTypes.LongType;
case LiteralType.DATETIME:
// datetime not supported due to timezone issues with java.sql.Timestamp
// check the InstanceAggregator for more info
return DataTypes.StringType;
}
throw new NotImplementedException("Not able to write literal type " + type);
}
示例8: getDataTypeFromReturnType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
String typeName = method.getReturnType().getSimpleName();
switch (typeName) {
case "int":
case "Integer":
return DataTypes.IntegerType;
case "long":
case "Long":
return DataTypes.LongType;
case "float":
case "Float":
return DataTypes.FloatType;
case "boolean":
case "Boolean":
return DataTypes.BooleanType;
case "double":
case "Double":
return DataTypes.DoubleType;
case "String":
return DataTypes.StringType;
case "Date":
case "date":
return DataTypes.DateType;
case "Timestamp":
return DataTypes.TimestampType;
case "short":
case "Short":
return DataTypes.ShortType;
case "Object":
return DataTypes.BinaryType;
default:
log.debug("Using default for type [{}]", typeName);
return DataTypes.BinaryType;
}
}
示例9: convertSqlTypeToSparkSqlDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
if (sqlType == java.sql.Types.BOOLEAN) {
return DataTypes.BooleanType;
} else if (sqlType == Types.TINYINT) {
return DataTypes.ByteType;
} else if (sqlType == Types.SMALLINT) {
return DataTypes.ShortType;
} else if (sqlType == java.sql.Types.INTEGER) {
return DataTypes.IntegerType;
} else if (sqlType == java.sql.Types.BIGINT) {
return DataTypes.LongType;
} else if (sqlType == Types.DECIMAL) {
return DataTypes.createDecimalType();
} else if (sqlType == java.sql.Types.FLOAT) {
return DataTypes.FloatType;
} else if (sqlType == java.sql.Types.DOUBLE) {
return DataTypes.DoubleType;
} else if (sqlType == Types.DATE) {
return DataTypes.DateType;
} else if (sqlType == Types.TIME) {
return DataTypes.TimestampType;
} else if (sqlType == Types.TIMESTAMP) {
return DataTypes.TimestampType;
} else if (sqlType == java.sql.Types.VARCHAR) {
return DataTypes.StringType;
} else {
logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
return DataTypes.StringType;
}
}
示例10: fromSchemaSequence
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert the DataVec sequence schema to a StructType for Spark, for example for use in
* {@link #toDataFrameSequence(Schema, JavaRDD)}}
* <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
* - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
* - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
* of this record in the original time series.<br>
* These two columns are required if the data is to be converted back into a sequence at a later point, for example
* using {@link #toRecordsSequence(DataRowsFacade)}
*
* @param schema Schema to convert
* @return StructType for the schema
*/
public static StructType fromSchemaSequence(Schema schema) {
StructField[] structFields = new StructField[schema.numColumns() + 2];
structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());
for (int i = 0; i < schema.numColumns(); i++) {
switch (schema.getColumnTypes().get(i)) {
case Double:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
break;
case Integer:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
break;
case Long:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
break;
case Float:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
break;
default:
throw new IllegalStateException(
"This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
}
}
return new StructType(structFields);
}
示例11: testPlanner
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testPlanner() {
List<Row> rows = Lists.newArrayList(RowFactory.create("a", 1, false), RowFactory.create("b", 2, true));
StructType schema = new StructType(new StructField[] {
new StructField("field1", DataTypes.StringType, false, null),
new StructField("field2", DataTypes.IntegerType, false, null),
new StructField("field3", DataTypes.BooleanType, false, null)
});
Dataset<Row> data = Contexts.getSparkSession().createDataFrame(rows, schema);
BulkPlanner p = new DeletePlanner();
p.configure(ConfigFactory.empty());
List<Tuple2<MutationType, Dataset<Row>>> planned = p.planMutationsForSet(data);
assertEquals(1, planned.size());
assertEquals(MutationType.DELETE, planned.get(0)._1());
assertEquals(data, planned.get(0)._2());
}
示例12: indexrSchemaToSparkSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
List<StructField> fields = new ArrayList<>();
for (ColumnSchema cs : schema.getColumns()) {
DataType dataType;
switch (cs.getSqlType()) {
case INT:
dataType = DataTypes.IntegerType;
break;
case BIGINT:
dataType = DataTypes.LongType;
break;
case FLOAT:
dataType = DataTypes.FloatType;
break;
case DOUBLE:
dataType = DataTypes.DoubleType;
break;
case VARCHAR:
dataType = DataTypes.StringType;
break;
case DATE:
dataType = DataTypes.DateType;
break;
case DATETIME:
dataType = DataTypes.TimestampType;
break;
default:
throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
}
fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
}
return fields;
}
示例13: parseDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType parseDataType(Config fieldsConfig) {
String type = fieldsConfig.getString(FIELD_TYPE_CONFIG);
switch (type) {
case "string":
return DataTypes.StringType;
case "byte":
return DataTypes.ByteType;
case "short":
return DataTypes.ShortType;
case "int":
return DataTypes.IntegerType;
case "long":
return DataTypes.LongType;
case "float":
return DataTypes.FloatType;
case "double":
return DataTypes.DoubleType;
case "decimal":
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_SCALE_CONFIG);
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_PRECISION_CONFIG);
return DataTypes.createDecimalType(
fieldsConfig.getInt(DECIMAL_SCALE_CONFIG),
fieldsConfig.getInt(DECIMAL_PRECISION_CONFIG));
case "boolean":
return DataTypes.BooleanType;
case "binary":
return DataTypes.BinaryType;
case "date":
return DataTypes.DateType;
case "timestamp":
return DataTypes.TimestampType;
case "array":
case "map":
case "struct":
throw new RuntimeException("Schema check does not currently support complex types");
default:
throw new RuntimeException("Unknown type: " + type);
}
}
示例14: testAgeRangeInt
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testAgeRangeInt() {
StructType schema = new StructType(new StructField[] {
new StructField("name", DataTypes.StringType, false, Metadata.empty()),
new StructField("nickname", DataTypes.StringType, false, Metadata.empty()),
new StructField("age", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("candycrushscore", DataTypes.createDecimalType(), false, Metadata.empty())
});
Map<String, Object> configMap = new HashMap<>();
configMap.put("fields", Lists.newArrayList("age"));
configMap.put("fieldtype", "int");
configMap.put("range", Lists.newArrayList(0,105));
Config config = ConfigFactory.parseMap(configMap);
RangeRowRule rule = new RangeRowRule();
rule.configure("agerange", config);
Row row1 = new RowWithSchema(schema, "Ian", "Ian", 34, new BigDecimal("0.00"));
assertTrue("Row should pass rule", rule.check(row1));
Row row2 = new RowWithSchema(schema, "Webster1", "Websta1", 110, new BigDecimal("450.10"));
assertFalse("Row should not pass rule", rule.check(row2));
Row row3 = new RowWithSchema(schema, "", "Ian1", 106, new BigDecimal("450.10"));
assertFalse("Row should not pass rule", rule.check(row3));
Row row4 = new RowWithSchema(schema, "First Last", "Ian Last", 105, new BigDecimal("450.10"));
assertTrue("Row should pass rule", rule.check(row4));
}
示例15: main
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Main method..
*
* @param args the arguments
*/
public static void main(final String[] args) {
final String tableName = "SparkExampleDFUsingCSV";
/** get the locator host/port from arguments, if specified.. **/
final String locatorHost = args.length > 0 ? args[0] : "localhost";
final int locatorPort = args.length > 1 ? Integer.valueOf(args[1]) : 10334;
/** create SparkContext **/
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("SparkExampleDFUsingCSV");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
StructType customSchema = new StructType(new StructField[] {
new StructField("year", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("make", DataTypes.StringType, true, Metadata.empty()),
new StructField("model", DataTypes.StringType, true, Metadata.empty()),
new StructField("comment", DataTypes.StringType, true, Metadata.empty()),
new StructField("blank", DataTypes.StringType, true, Metadata.empty())
});
DataFrame df = sqlContext.read()
.format("com.databricks.spark.csv")
.schema(customSchema)
.option("header", "true")
.load("cars.csv");
/** print schema of the data-frame **/
df.printSchema();
df.show();
Map<String, String> options = new HashMap<>(3);
options.put("ampool.locator.host", locatorHost);
options.put("ampool.locator.port", String.valueOf(locatorPort));
/** overwrite existing table, if specified.. **/
SaveMode saveMode = Boolean.getBoolean("overwrite") ? SaveMode.Overwrite : SaveMode.ErrorIfExists;
/** save the dataFrame to Ampool as `tableName' **/
df.write().format("io.ampool").options(options).mode(saveMode).save(tableName);
System.out.println("########## DATA FROM AMPOOL ############");
/** load the data-frame from Ampool `tableName' **/
DataFrame df1 = sqlContext.read().format("io.ampool").options(options).load(tableName);
/** show the contents of loaded data-frame **/
df1.show();
/** show the total number of rows in data-frame **/
System.out.println("# NumberOfRowsInDataFrame= " + df1.count());
/** data-frame with filter **/
df1.filter("year > 1997").show();
/** data-frame with selected columns **/
df1.select("year", "make", "model", "comment").show();
df1.registerTempTable("temp_table");
sqlContext.sql("select * from temp_table order by year").show();
}