本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.StringType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.StringType方法的具体用法?Java DataTypes.StringType怎么用?Java DataTypes.StringType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.types.DataTypes
的用法示例。
在下文中一共展示了DataTypes.StringType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test_getDataSetResult
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_getDataSetResult() {
StructField[] structFields = new StructField[]{
new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
};
StructType structType = new StructType(structFields);
List<Row> rows = new ArrayList<>();
rows.add(RowFactory.create(1, "v1"));
rows.add(RowFactory.create(2, "v2"));
Dataset<Row> df = sparkSession.createDataFrame(rows, structType);
DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
Assert.assertEquals(2, dataSetResult.getColumnNames().size());
Assert.assertEquals(2, dataSetResult.getRows().size());
Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
示例2: parse
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Parses a list of PoS-tagged sentences, each on a line and writes the result to an output
* file in a specified output format.
* @param jsc
* @param sentences
* @param outputFileName
* @param outuptFormat
*/
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
JavaRDD<String> input = jsc.parallelize(sentences);
JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
private static final long serialVersionUID = -812004521983071103L;
public Row call(DependencyGraph graph) {
return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
}
});
StructType schema = new StructType(new StructField[]{
new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
});
SQLContext sqlContext = new SQLContext(jsc);
DataFrame df = sqlContext.createDataFrame(rows, schema);
if (outputFormat == OutputFormat.TEXT)
df.select("dependency").write().text(outputFileName);
else
df.repartition(1).write().json(outputFileName);
}
示例3: translateDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
static
public DataType translateDataType(org.dmg.pmml.DataType dataType){
switch(dataType){
case STRING:
return DataTypes.StringType;
case INTEGER:
return DataTypes.IntegerType;
case FLOAT:
return DataTypes.FloatType;
case DOUBLE:
return DataTypes.DoubleType;
case BOOLEAN:
return DataTypes.BooleanType;
default:
throw new IllegalArgumentException();
}
}
示例4: getJdbcTypeString
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
return "DATETIME";
} else if (dataType == DataTypes.StringType) {
return sqlTypeForString;
} else if (dataType == DataTypes.IntegerType) {
return "INT";
} else if (dataType == DataTypes.LongType) {
return "BIGINT";
} else if (dataType == DataTypes.FloatType) {
return "FLOAT";
} else if (dataType == DataTypes.DoubleType) {
return "DOUBLE";
} else if (dataType == DataTypes.BooleanType) {
return "TINYINT";
} else if (dataType == DataTypes.ByteType) {
return "SMALLINT";
} else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
} else {
throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
}
}
示例5: getDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private DataType getDataType(int type) {
switch (type) {
case LiteralType.BOOLEAN:
return DataTypes.BooleanType;
case LiteralType.STRING:
return DataTypes.StringType;
case LiteralType.FLOAT:
return DataTypes.FloatType;
case LiteralType.DOUBLE:
return DataTypes.DoubleType;
case LiteralType.INTEGER:
return DataTypes.IntegerType;
case LiteralType.LONG:
return DataTypes.LongType;
case LiteralType.DATETIME:
// datetime not supported due to timezone issues with java.sql.Timestamp
// check the InstanceAggregator for more info
return DataTypes.StringType;
}
throw new NotImplementedException("Not able to write literal type " + type);
}
示例6: getDataTypeFromReturnType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
String typeName = method.getReturnType().getSimpleName();
switch (typeName) {
case "int":
case "Integer":
return DataTypes.IntegerType;
case "long":
case "Long":
return DataTypes.LongType;
case "float":
case "Float":
return DataTypes.FloatType;
case "boolean":
case "Boolean":
return DataTypes.BooleanType;
case "double":
case "Double":
return DataTypes.DoubleType;
case "String":
return DataTypes.StringType;
case "Date":
case "date":
return DataTypes.DateType;
case "Timestamp":
return DataTypes.TimestampType;
case "short":
case "Short":
return DataTypes.ShortType;
case "Object":
return DataTypes.BinaryType;
default:
log.debug("Using default for type [{}]", typeName);
return DataTypes.BinaryType;
}
}
示例7: convertSqlTypeToSparkSqlDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
if (sqlType == java.sql.Types.BOOLEAN) {
return DataTypes.BooleanType;
} else if (sqlType == Types.TINYINT) {
return DataTypes.ByteType;
} else if (sqlType == Types.SMALLINT) {
return DataTypes.ShortType;
} else if (sqlType == java.sql.Types.INTEGER) {
return DataTypes.IntegerType;
} else if (sqlType == java.sql.Types.BIGINT) {
return DataTypes.LongType;
} else if (sqlType == Types.DECIMAL) {
return DataTypes.createDecimalType();
} else if (sqlType == java.sql.Types.FLOAT) {
return DataTypes.FloatType;
} else if (sqlType == java.sql.Types.DOUBLE) {
return DataTypes.DoubleType;
} else if (sqlType == Types.DATE) {
return DataTypes.DateType;
} else if (sqlType == Types.TIME) {
return DataTypes.TimestampType;
} else if (sqlType == Types.TIMESTAMP) {
return DataTypes.TimestampType;
} else if (sqlType == java.sql.Types.VARCHAR) {
return DataTypes.StringType;
} else {
logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
return DataTypes.StringType;
}
}
示例8: fromSchemaSequence
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Convert the DataVec sequence schema to a StructType for Spark, for example for use in
* {@link #toDataFrameSequence(Schema, JavaRDD)}}
* <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
* - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
* - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
* of this record in the original time series.<br>
* These two columns are required if the data is to be converted back into a sequence at a later point, for example
* using {@link #toRecordsSequence(DataRowsFacade)}
*
* @param schema Schema to convert
* @return StructType for the schema
*/
public static StructType fromSchemaSequence(Schema schema) {
StructField[] structFields = new StructField[schema.numColumns() + 2];
structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());
for (int i = 0; i < schema.numColumns(); i++) {
switch (schema.getColumnTypes().get(i)) {
case Double:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
break;
case Integer:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
break;
case Long:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
break;
case Float:
structFields[i + 2] =
new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
break;
default:
throw new IllegalStateException(
"This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
}
}
return new StructType(structFields);
}
示例9: testGetPartialKey
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testGetPartialKey() throws Exception {
addEntriesToHBase();
Table table = connection.getTable(TableName.valueOf(TABLE));
scanAndCountTable(table, INPUT_ROWS * 4);
Config config = ConfigUtils.configFromResource("/hbase/hbase-output-test.conf").getConfig("output");
config = config.withValue("zookeeper",
ConfigValueFactory.fromAnyRef("localhost:" + utility.getZkCluster().getClientPort()));
HBaseOutput output = new HBaseOutput();
output.configure(config);
StructType partialKeySchema = new StructType(new StructField[] {
new StructField("symbol", DataTypes.StringType, false, null)
});
List<Row> filters = Lists.newArrayList();
filters.add(new RowWithSchema(partialKeySchema, "AAPL"));
filters.add(new RowWithSchema(partialKeySchema, "GOOG"));
Iterable<Row> filtered = output.getExistingForFilters(filters);
assertEquals(25, Iterables.size(filtered));
}
示例10: createDataFrame
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Creates a data frame from a list of tagged sentences.
* @param taggedSentences
* @return a data frame of two columns: "sentence" and "partOfSpeech".
*/
public DataFrame createDataFrame(List<String> taggedSentences) {
List<String> wordSequences = new LinkedList<String>();
List<String> tagSequences = new LinkedList<String>();
for (String taggedSentence : taggedSentences) {
StringBuilder wordBuf = new StringBuilder();
StringBuilder tagBuf = new StringBuilder();
String[] tokens = taggedSentence.split("\\s+");
for (String token : tokens) {
String[] parts = token.split("/");
if (parts.length == 2) {
wordBuf.append(parts[0]);
wordBuf.append(' ');
tagBuf.append(parts[1]);
tagBuf.append(' ');
} else { // this token is "///"
wordBuf.append('/');
wordBuf.append(' ');
tagBuf.append('/');
tagBuf.append(' ');
}
}
wordSequences.add(wordBuf.toString().trim());
tagSequences.add(tagBuf.toString().trim());
}
if (verbose) {
System.out.println("Number of sentences = " + wordSequences.size());
}
List<Row> rows = new LinkedList<Row>();
for (int i = 0; i < wordSequences.size(); i++) {
rows.add(RowFactory.create(wordSequences.get(i), tagSequences.get(i)));
}
JavaRDD<Row> jrdd = jsc.parallelize(rows);
StructType schema = new StructType(new StructField[]{
new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
new StructField("partOfSpeech", DataTypes.StringType, false, Metadata.empty())
});
return new SQLContext(jsc).createDataFrame(jrdd, schema);
}
示例11: tag
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
* Tags a list of sequences and returns a list of tag sequences.
* @param sentences
* @return a list of tagged sequences.
*/
public List<String> tag(List<String> sentences) {
List<Row> rows = new LinkedList<Row>();
for (String sentence : sentences) {
rows.add(RowFactory.create(sentence));
}
StructType schema = new StructType(new StructField[]{
new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
});
SQLContext sqlContext = new SQLContext(jsc);
DataFrame input = sqlContext.createDataFrame(rows, schema);
if (cmmModel != null) {
DataFrame output = cmmModel.transform(input).repartition(1);
return output.javaRDD().map(new RowToStringFunction(1)).collect();
} else {
System.err.println("Tagging model is null. You need to create or load a model first.");
return null;
}
}
示例12: transform
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Override
public DataFrame transform(DataFrame dataset) {
JavaRDD<Row> output = dataset.javaRDD().map(new DecodeFunction());
StructType schema = new StructType(new StructField[]{
new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
new StructField("prediction", DataTypes.StringType, false, Metadata.empty())
});
return dataset.sqlContext().createDataFrame(output, schema);
}
示例13: indexrSchemaToSparkSchema
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
List<StructField> fields = new ArrayList<>();
for (ColumnSchema cs : schema.getColumns()) {
DataType dataType;
switch (cs.getSqlType()) {
case INT:
dataType = DataTypes.IntegerType;
break;
case BIGINT:
dataType = DataTypes.LongType;
break;
case FLOAT:
dataType = DataTypes.FloatType;
break;
case DOUBLE:
dataType = DataTypes.DoubleType;
break;
case VARCHAR:
dataType = DataTypes.StringType;
break;
case DATE:
dataType = DataTypes.DateType;
break;
case DATETIME:
dataType = DataTypes.TimestampType;
break;
default:
throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
}
fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
}
return fields;
}
示例14: parseDataType
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType parseDataType(Config fieldsConfig) {
String type = fieldsConfig.getString(FIELD_TYPE_CONFIG);
switch (type) {
case "string":
return DataTypes.StringType;
case "byte":
return DataTypes.ByteType;
case "short":
return DataTypes.ShortType;
case "int":
return DataTypes.IntegerType;
case "long":
return DataTypes.LongType;
case "float":
return DataTypes.FloatType;
case "double":
return DataTypes.DoubleType;
case "decimal":
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_SCALE_CONFIG);
ConfigUtils.assertConfig(fieldsConfig, DECIMAL_PRECISION_CONFIG);
return DataTypes.createDecimalType(
fieldsConfig.getInt(DECIMAL_SCALE_CONFIG),
fieldsConfig.getInt(DECIMAL_PRECISION_CONFIG));
case "boolean":
return DataTypes.BooleanType;
case "binary":
return DataTypes.BinaryType;
case "date":
return DataTypes.DateType;
case "timestamp":
return DataTypes.TimestampType;
case "array":
case "map":
case "struct":
throw new RuntimeException("Schema check does not currently support complex types");
default:
throw new RuntimeException("Unknown type: " + type);
}
}
示例15: evaluateStepByKeyDecision
import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private boolean evaluateStepByKeyDecision(Set<Step> steps) {
Optional<Step> optionalStep = StepUtils.getStepForName(stepByKeyStepName, steps);
if (!optionalStep.isPresent()) {
throw new RuntimeException("Unknown decision step's key step: " + stepByValueStepName);
}
if (!(optionalStep.get() instanceof DataStep)) {
throw new RuntimeException("Decision step's key step is not a data step: " + optionalStep.get().getName());
}
Dataset<Row> keyDataset = ((DataStep)optionalStep.get()).getData();
if (keyDataset.schema().fields().length != 2 ||
keyDataset.schema().fields()[0].dataType() != DataTypes.StringType ||
keyDataset.schema().fields()[1].dataType() != DataTypes.BooleanType)
{
throw new RuntimeException("Decision step's key step must contain a string column and then a boolean column");
}
String keyColumnName = keyDataset.schema().fieldNames()[0];
String whereClause = keyColumnName + " = '" + stepByKeyKey + "'";
Dataset<Row> decisionDataset = keyDataset.where(whereClause);
if (decisionDataset.count() != 1) {
throw new RuntimeException("Decision step's key step must contain a single record for the given key");
}
boolean decision = decisionDataset.collectAsList().get(0).getBoolean(1);
return decision;
}