本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.LongType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.LongType方法的具体用法?Java DataTypes.LongType怎么用?Java DataTypes.LongType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.types.DataTypes
的用法示例。
在下文中一共展示了DataTypes.LongType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getJdbcTypeString
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
return "DATETIME";
} else if (dataType == DataTypes.StringType) {
return sqlTypeForString;
} else if (dataType == DataTypes.IntegerType) {
return "INT";
} else if (dataType == DataTypes.LongType) {
return "BIGINT";
} else if (dataType == DataTypes.FloatType) {
return "FLOAT";
} else if (dataType == DataTypes.DoubleType) {
return "DOUBLE";
} else if (dataType == DataTypes.BooleanType) {
return "TINYINT";
} else if (dataType == DataTypes.ByteType) {
return "SMALLINT";
} else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
} else {
throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
}
}
示例2: fromSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert a datavec schema to a
* struct type in spark
*
* @param schema the schema to convert
* @return the datavec struct type
*/
public static StructType fromSchema(Schema schema) {
StructField[] structFields = new StructField[schema.numColumns()];
for (int i = 0; i < structFields.length; i++) {
switch (schema.getColumnTypes().get(i)) {
case Double:
structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
break;
case Integer:
structFields[i] =
new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
break;
case Long:
structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
break;
case Float:
structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
break;
default:
throw new IllegalStateException(
"This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
}
}
return new StructType(structFields);
}
示例3: getDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private DataType getDataType(int type) {
switch (type) {
case LiteralType.BOOLEAN:
return DataTypes.BooleanType;
case LiteralType.STRING:
return DataTypes.StringType;
case LiteralType.FLOAT:
return DataTypes.FloatType;
case LiteralType.DOUBLE:
return DataTypes.DoubleType;
case LiteralType.INTEGER:
return DataTypes.IntegerType;
case LiteralType.LONG:
return DataTypes.LongType;
case LiteralType.DATETIME:
// datetime not supported due to timezone issues with java.sql.Timestamp
// check the InstanceAggregator for more info
return DataTypes.StringType;
}
throw new NotImplementedException("Not able to write literal type " + type);
}
示例4: getDataTypeFromReturnType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
String typeName = method.getReturnType().getSimpleName();
switch (typeName) {
case "int":
case "Integer":
return DataTypes.IntegerType;
case "long":
case "Long":
return DataTypes.LongType;
case "float":
case "Float":
return DataTypes.FloatType;
case "boolean":
case "Boolean":
return DataTypes.BooleanType;
case "double":
case "Double":
return DataTypes.DoubleType;
case "String":
return DataTypes.StringType;
case "Date":
case "date":
return DataTypes.DateType;
case "Timestamp":
return DataTypes.TimestampType;
case "short":
case "Short":
return DataTypes.ShortType;
case "Object":
return DataTypes.BinaryType;
default:
log.debug("Using default for type [{}]", typeName);
return DataTypes.BinaryType;
}
}
示例5: convertSqlTypeToSparkSqlDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
if (sqlType == java.sql.Types.BOOLEAN) {
return DataTypes.BooleanType;
} else if (sqlType == Types.TINYINT) {
return DataTypes.ByteType;
} else if (sqlType == Types.SMALLINT) {
return DataTypes.ShortType;
} else if (sqlType == java.sql.Types.INTEGER) {
return DataTypes.IntegerType;
} else if (sqlType == java.sql.Types.BIGINT) {
return DataTypes.LongType;
} else if (sqlType == Types.DECIMAL) {
return DataTypes.createDecimalType();
} else if (sqlType == java.sql.Types.FLOAT) {
return DataTypes.FloatType;
} else if (sqlType == java.sql.Types.DOUBLE) {
return DataTypes.DoubleType;
} else if (sqlType == Types.DATE) {
return DataTypes.DateType;
} else if (sqlType == Types.TIME) {
return DataTypes.TimestampType;
} else if (sqlType == Types.TIMESTAMP) {
return DataTypes.TimestampType;
} else if (sqlType == java.sql.Types.VARCHAR) {
return DataTypes.StringType;
} else {
logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
return DataTypes.StringType;
}
}
示例6: fromSchemaSequence
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert the DataVec sequence schema to a StructType for Spark, for example for use in
* {@link #toDataFrameSequence(Schema, JavaRDD)}}
* <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
* - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
* - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
* of this record in the original time series.<br>
* These two columns are required if the data is to be converted back into a sequence at a later point, for example
* using {@link #toRecordsSequence(DataRowsFacade)}
*
* @param schema Schema to convert
* @return StructType for the schema
*/
public static StructType fromSchemaSequence(Schema schema) {
StructField[] structFields = new StructField[schema.numColumns() + 2];
structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());
for (int i = 0; i < schema.numColumns(); i++) {
switch (schema.getColumnTypes().get(i)) {
case Double:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
break;
case Integer:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
break;
case Long:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
break;
case Float:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
break;
default:
throw new IllegalStateException(
"This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
}
}
return new StructType(structFields);
}
示例7: check
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Override
public Dataset<Row> check(Dataset<Row> dataset, Map<String, Dataset<Row>> stepDependencies) {
if (isDependency()) {
Dataset<Row> expectedDependency = stepDependencies.get(dependency);
if (expectedDependency.count() == 1 && expectedDependency.schema().fields().length == 1
&& expectedDependency.schema().apply(0).dataType() == DataTypes.LongType) {
expected = expectedDependency.collectAsList().get(0).getLong(0);
} else {
throw new RuntimeException("Step dependency for count rule must have one row with a single field of long type");
}
}
if (expected < 0) {
throw new RuntimeException("Failed to determine expected count: must be specified either as literal or step dependency");
}
return dataset.groupBy().count().map(new CheckCount(expected, name), RowEncoder.apply(SCHEMA));
}
示例8: indexrSchemaToSparkSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
List<StructField> fields = new ArrayList<>();
for (ColumnSchema cs : schema.getColumns()) {
DataType dataType;
switch (cs.getSqlType()) {
case INT:
dataType = DataTypes.IntegerType;
break;
case BIGINT:
dataType = DataTypes.LongType;
break;
case FLOAT:
dataType = DataTypes.FloatType;
break;
case DOUBLE:
dataType = DataTypes.DoubleType;
break;
case VARCHAR:
dataType = DataTypes.StringType;
break;
case DATE:
dataType = DataTypes.DateType;
break;
case DATETIME:
dataType = DataTypes.TimestampType;
break;
default:
throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
}
fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
}
return fields;
}
示例9: parseDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType parseDataType(Config fieldsConfig) {
String type = fieldsConfig.getString(FIELD_TYPE_CONFIG);
switch (type) {
case "string":
return DataTypes.StringType;
case "byte":
return DataTypes.ByteType;
case "short":
return DataTypes.ShortType;
case "int":
return DataTypes.IntegerType;
case "long":
return DataTypes.LongType;
case "float":
return DataTypes.FloatType;
case "double":
return DataTypes.DoubleType;
case "decimal":
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_SCALE_CONFIG);
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_PRECISION_CONFIG);
return DataTypes.createDecimalType(
fieldsConfig.getInt(DECIMAL_SCALE_CONFIG),
fieldsConfig.getInt(DECIMAL_PRECISION_CONFIG));
case "boolean":
return DataTypes.BooleanType;
case "binary":
return DataTypes.BinaryType;
case "date":
return DataTypes.DateType;
case "timestamp":
return DataTypes.TimestampType;
case "array":
case "map":
case "struct":
throw new RuntimeException("Schema check does not currently support complex types");
default:
throw new RuntimeException("Unknown type: " + type);
}
}
示例10: testAgeRangeLong
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testAgeRangeLong() {
StructType schema = new StructType(new StructField[] {
new StructField("name", DataTypes.StringType, false, Metadata.empty()),
new StructField("nickname", DataTypes.StringType, false, Metadata.empty()),
new StructField("age", DataTypes.LongType, false, Metadata.empty()),
new StructField("candycrushscore", DataTypes.createDecimalType(), false, Metadata.empty())
});
Map<String, Object> configMap = new HashMap<>();
configMap.put("fields", Lists.newArrayList("age"));
configMap.put("range", Lists.newArrayList(0l,105l));
Config config = ConfigFactory.parseMap(configMap);
RangeRowRule rule = new RangeRowRule();
rule.configure("agerange", config);
Row row1 = new RowWithSchema(schema, "Ian", "Ian", 34l, new BigDecimal("0.00"));
assertTrue("Row should pass rule", rule.check(row1));
Row row2 = new RowWithSchema(schema, "Webster1", "Websta1", 110l, new BigDecimal("450.10"));
assertFalse("Row should not pass rule", rule.check(row2));
Row row3 = new RowWithSchema(schema, "", "Ian1", 110l, new BigDecimal("450.10"));
assertFalse("Row should not pass rule", rule.check(row3));
Row row4 = new RowWithSchema(schema, "First Last", "Ian Last", 100l, new BigDecimal("450.10"));
assertTrue("Row should pass rule", rule.check(row4));
}
示例11: main
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "E:\\sumitK\\Hadoop");
SparkSession sparkSession = SparkSession
.builder()
.master("local")
.config("spark.sql.warehouse.dir","file:///E:/sumitK/Hadoop/warehouse")
.appName("JavaALSExample")
.getOrCreate();
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
JavaRDD<Movie> moviesRDD = sparkSession
.read().textFile("C:/Users/sumit.kumar/git/learning/src/main/resources/movies.csv")
.javaRDD().filter( str-> !(null==str))
.filter(str-> !(str.length()==0))
.filter(str-> !str.contains("movieId"))
.map(str -> Movie.parseRating(str));
moviesRDD.foreach(m -> System.out.println(m));
Dataset<Row> csv_read = sparkSession.read().format("com.databricks.spark.csv")
.option("header", "true")
.option("inferSchema", "true")
.load("C:/Users/sumit.kumar/git/learning/src/main/resources/movies.csv");
csv_read.printSchema();
csv_read.show();
StructType customSchema = new StructType(new StructField[] {
new StructField("movieId", DataTypes.LongType, true, Metadata.empty()),
new StructField("title", DataTypes.StringType, true, Metadata.empty()),
new StructField("genres", DataTypes.StringType, true, Metadata.empty())
});
Dataset<Row> csv_custom_read = sparkSession.read().format("com.databricks.spark.csv")
.option("header", "true")
.schema(customSchema)
.load("C:/Users/sumit.kumar/git/learning/src/main/resources/movies.csv");
csv_custom_read.printSchema();
csv_custom_read.show();
csv_custom_read.write()
.format("com.databricks.spark.csv")
.option("header", "true")
.option("codec", "org.apache.hadoop.io.compress.GzipCodec")
.save("C:/Users/sumit.kumar/git/learning/src/main/resources/newMovies.csv");
}
示例12: test_writeJdbc
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_writeJdbc() throws IOException {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty()),
new StructField("longColumn", DataTypes.LongType, true, Metadata.empty()),
new StructField("textColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1", 11L, "text1"));
rows.add(RowFactory.create(2, "v2", 22L, "text2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
df.registerTempTable("df1");
ActionStatement actionStatement = new ActionStatement();
actionStatement.setFunctionName("writeJdbc");
File file = File.createTempFile("h2dbfile", ".db");
file.deleteOnExit();
String connectionString = String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MySQL", file.getAbsolutePath());
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, connectionString));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "result1"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "INTcolumn"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "STRINGcolumn,LONGcolumn"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "textColumn"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "Append"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.Table, "df1"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "2"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "100000"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, ""));
QueryActionEngine executor = new QueryActionEngine();
executor.addActionStatementExecutor(WriteJdbcActionStatementExecutor.ACTION_NAME, new WriteJdbcActionStatementExecutor());
executor.execute(actionStatement, sparkSession);
List<List<Object>> queryResult = JdbcUtils.executeQuery(connectionString, "select * from result1");
Assert.assertEquals(2, queryResult.size());
Assert.assertEquals(4, queryResult.get(0).size());
Assert.assertEquals(new Integer(1), queryResult.get(0).get(0));
Assert.assertEquals("v1", queryResult.get(0).get(1));
Assert.assertEquals(new Integer(2), queryResult.get(1).get(0));
Assert.assertEquals("v2", queryResult.get(1).get(1));
}
示例13: test_writeCsvFile
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_writeCsvFile() throws IOException {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty()),
new StructField("longColumn", DataTypes.LongType, true, Metadata.empty()),
new StructField("textColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1", 11L, "text1"));
rows.add(RowFactory.create(2, "v2", 22L, "text2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
df.registerTempTable("df1");
File file = File.createTempFile("test_output_file", ".tmp");
String filePath = file.getAbsolutePath();
file.delete();
boolean fileExists = file.exists();
Assert.assertFalse(fileExists);
ActionStatement actionStatement = new ActionStatement();
actionStatement.setFunctionName("writeCsvFile");
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, filePath));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "Append"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.Table, "df1"));
QueryActionEngine executor = new QueryActionEngine();
executor.addActionStatementExecutor(WriteCsvFileActionStatementExecutor.ACTION_NAME, new WriteCsvFileActionStatementExecutor());
executor.execute(actionStatement, sparkSession);
fileExists = file.exists();
Assert.assertTrue(fileExists);
file.delete();
}
示例14: test_writeJsonFile
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_writeJsonFile() throws IOException {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty()),
new StructField("longColumn", DataTypes.LongType, true, Metadata.empty()),
new StructField("textColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1", 11L, "text1"));
rows.add(RowFactory.create(2, "v2", 22L, "text2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
df.registerTempTable("df1");
File file = File.createTempFile("test_output_file", ".tmp");
String filePath = file.getAbsolutePath();
file.delete();
boolean fileExists = file.exists();
Assert.assertFalse(fileExists);
ActionStatement actionStatement = new ActionStatement();
actionStatement.setFunctionName("writeJsonFile");
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, filePath));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "Append"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.Table, "df1"));
QueryActionEngine executor = new QueryActionEngine();
executor.addActionStatementExecutor(WriteJsonFileActionStatementExecutor.ACTION_NAME, new WriteJsonFileActionStatementExecutor());
executor.execute(actionStatement, sparkSession);
fileExists = file.exists();
Assert.assertTrue(fileExists);
file.delete();
}
示例15: test_writeParquetFile
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_writeParquetFile() throws IOException {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty()),
new StructField("longColumn", DataTypes.LongType, true, Metadata.empty()),
new StructField("textColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1", 11L, "text1"));
rows.add(RowFactory.create(2, "v2", 22L, "text2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
df.registerTempTable("df1");
File file = File.createTempFile("test_output_file", ".tmp");
String filePath = file.getAbsolutePath();
file.delete();
boolean fileExists = file.exists();
Assert.assertFalse(fileExists);
ActionStatement actionStatement = new ActionStatement();
actionStatement.setFunctionName("writeParquetFile");
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, filePath));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.String, "Append"));
actionStatement.getParamValues().add(new ActionParamValue(ValueType.Table, "df1"));
QueryActionEngine executor = new QueryActionEngine();
executor.addActionStatementExecutor(WriteParquetFileActionStatementExecutor.ACTION_NAME, new WriteParquetFileActionStatementExecutor());
executor.execute(actionStatement, sparkSession);
fileExists = file.exists();
Assert.assertTrue(fileExists);
file.delete();
}