当前位置: 首页>>代码示例>>Java>>正文


Java DataTypes类代码示例

本文整理汇总了Java中org.apache.spark.sql.types.DataTypes的典型用法代码示例。如果您正苦于以下问题:Java DataTypes类的具体用法?Java DataTypes怎么用?Java DataTypes使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DataTypes类属于org.apache.spark.sql.types包,在下文中一共展示了DataTypes类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createNGramDataFrame

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
 * Creates a n-gram data frame from text lines.
 * @param lines
 * @return a n-gram data frame.
 */
DataFrame createNGramDataFrame(JavaRDD<String> lines) {
	JavaRDD<Row> rows = lines.map(new Function<String, Row>(){
		private static final long serialVersionUID = -4332903997027358601L;
		
		@Override
		public Row call(String line) throws Exception {
			return RowFactory.create(Arrays.asList(line.split("\\s+")));
		}
	});
	StructType schema = new StructType(new StructField[] {
			new StructField("words",
					DataTypes.createArrayType(DataTypes.StringType), false,
					Metadata.empty()) });
	DataFrame wordDF = new SQLContext(jsc).createDataFrame(rows, schema);
	// build a bigram language model
	NGram transformer = new NGram().setInputCol("words")
			.setOutputCol("ngrams").setN(2);
	DataFrame ngramDF = transformer.transform(wordDF);
	ngramDF.show(10, false);
	return ngramDF;
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:27,代码来源:NGramBuilder.java

示例2: test_getDataSetResult

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Test
public void test_getDataSetResult() {

    StructField[] structFields = new StructField[]{
            new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
            new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
    };

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();
    rows.add(RowFactory.create(1, "v1"));
    rows.add(RowFactory.create(2, "v2"));

    Dataset<Row> df = sparkSession.createDataFrame(rows, structType);

    DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
    Assert.assertEquals(2, dataSetResult.getColumnNames().size());
    Assert.assertEquals(2, dataSetResult.getRows().size());
    Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
    Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
    Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
    Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:25,代码来源:SparkUtilsTest.java

示例3: generateData_week_timepoints_by_10_minutes

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "timepoint";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    int weekTotalMinutes = 7 * 24 * 60;
    int timepointIntervalMinutes = 10;
    for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:24,代码来源:QueryEngine.java

示例4: parse

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
 * Parses a list of PoS-tagged sentences, each on a line and writes the result to an output 
 * file in a specified output format.
 * @param jsc
 * @param sentences
 * @param outputFileName
 * @param outuptFormat
 */
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
	JavaRDD<String> input = jsc.parallelize(sentences);
	JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
	JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
	JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
		private static final long serialVersionUID = -812004521983071103L;
		public Row call(DependencyGraph graph) {
			return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
		}
	});
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),	
		new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame df = sqlContext.createDataFrame(rows, schema);
	
	if (outputFormat == OutputFormat.TEXT)  
		df.select("dependency").write().text(outputFileName);
	else 
		df.repartition(1).write().json(outputFileName);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:31,代码来源:DependencyParser.java

示例5: datasetSchema

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
static public StructType datasetSchema(Map<String, String> mappa) {
	StructType struct = new StructType();
	for (Map.Entry<String, String> entry : mappa.entrySet()) {
		switch (entry.getValue().toLowerCase()) {
		case "string":
		case "dictionary":
			struct = struct.add(entry.getKey(), DataTypes.StringType);
			break;
		case "int":
			struct = struct.add(entry.getKey(), DataTypes.IntegerType);
			break;
		case "double":
			struct = struct.add(entry.getKey(), DataTypes.DoubleType);
			break;
		}
	}
	return struct;
}
 
开发者ID:pfratta,项目名称:ParquetUtils,代码行数:19,代码来源:ParquetGeneratorEngine.java

示例6: translateDataType

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
static
public DataType translateDataType(org.dmg.pmml.DataType dataType){

	switch(dataType){
		case STRING:
			return DataTypes.StringType;
		case INTEGER:
			return DataTypes.IntegerType;
		case FLOAT:
			return DataTypes.FloatType;
		case DOUBLE:
			return DataTypes.DoubleType;
		case BOOLEAN:
			return DataTypes.BooleanType;
		default:
			throw new IllegalArgumentException();
	}
}
 
开发者ID:jeremyore,项目名称:spark-pmml-import,代码行数:19,代码来源:SchemaUtil.java

示例7: getJdbcTypeString

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
    int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
    String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
    if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
        return "DATETIME";
    } else if (dataType == DataTypes.StringType) {
        return sqlTypeForString;
    } else if (dataType == DataTypes.IntegerType) {
        return "INT";
    } else if (dataType == DataTypes.LongType) {
        return "BIGINT";
    } else if (dataType == DataTypes.FloatType) {
        return "FLOAT";
    } else if (dataType == DataTypes.DoubleType) {
        return "DOUBLE";
    } else if (dataType == DataTypes.BooleanType) {
        return "TINYINT";
    } else if (dataType == DataTypes.ByteType) {
        return "SMALLINT";
    } else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
        org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
        return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
    } else {
        throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
    }
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:27,代码来源:JdbcUtils.java

示例8: init

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Override
public StructField init(Evaluator evaluator){
	OutputField field = getField();

	DataType dataType = field.getDataType();
	if(dataType == null){

		try {
			dataType = OutputUtil.getDataType(field.getOutputField(), (ModelEvaluator<?>)evaluator);

			this.formatString = false;
		} catch(PMMLException pe){
			dataType = DataType.STRING;

			this.formatString = true;
		}
	}

	return DataTypes.createStructField(getColumnName(), SchemaUtil.translateDataType(dataType), false);
}
 
开发者ID:jeremyore,项目名称:spark-pmml-import,代码行数:21,代码来源:OutputColumnProducer.java

示例9: generateData_numbers_1k

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "number";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    for (int i = 0; i <= 1000; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:22,代码来源:QueryEngine.java

示例10: writeEntityMetadata

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
 * Write metadata describing entity tables
 *
 * @param entitySchema the entity schema
 */
public void writeEntityMetadata(EntitySchema entitySchema) {

    // create the schema
    List<StructField> fields = new ArrayList<>();
    fields.add(DataTypes.createStructField(ENTITIES_NAME, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(ENTITIES_URI, DataTypes.StringType, false));
    fields.add(DataTypes.createStructField(ENTITIES_LABEL, DataTypes.StringType, true));
    fields.add(DataTypes.createStructField(ENTITIES_NUM_ROWS, DataTypes.LongType, false));
    StructType schema = DataTypes.createStructType(fields);

    List<Tuple2<String, String>> indexes = new ArrayList<>();
    indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_URI));

    List<Tuple2<String, String>> primaryKeys = new ArrayList<>();
    indexes.add(new Tuple2<>(ENTITIES_TABLE_NAME, ENTITIES_NAME));

    final Map<String, String> uriLabels = rdfSchema.getUriLabels();
    // create table rows
    List<Row> rows = entitySchema.getTables().stream()
            .map(table -> {
                Object[] valueArray = new Object[]{
                        table.getName(),
                        table.getTypeURI(),
                        uriLabels.get(table.getTypeURI()),
                        table.getNumRows()
                };
                return RowFactory.create(valueArray);
            }).collect(Collectors.toList());

    // create and write the META_Entities dataframe
    DataFrame df = sql.createDataFrame(rows, schema);
    persistor.writeDataFrame(ENTITIES_TABLE_NAME, df);
    persistor.createPrimaryKeys(primaryKeys);
    persistor.createIndexes(indexes);
    df.unpersist();
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:42,代码来源:MetadataWriter.java

示例11: getDataType

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private DataType getDataType(int type) {
    switch (type) {
        case LiteralType.BOOLEAN:
            return DataTypes.BooleanType;
        case LiteralType.STRING:
            return DataTypes.StringType;
        case LiteralType.FLOAT:
            return DataTypes.FloatType;
        case LiteralType.DOUBLE:
            return DataTypes.DoubleType;
        case LiteralType.INTEGER:
            return DataTypes.IntegerType;
        case LiteralType.LONG:
            return DataTypes.LongType;
        case LiteralType.DATETIME:
            // datetime not supported due to timezone issues with java.sql.Timestamp
            // check the InstanceAggregator for more info
            return DataTypes.StringType;
    }
    throw new NotImplementedException("Not able to write literal type " + type);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:22,代码来源:InstanceRelationWriter.java

示例12: getDataTypeFromReturnType

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
    String typeName = method.getReturnType().getSimpleName();
    switch (typeName) {
    case "int":
    case "Integer":
        return DataTypes.IntegerType;
    case "long":
    case "Long":
        return DataTypes.LongType;
    case "float":
    case "Float":
        return DataTypes.FloatType;
    case "boolean":
    case "Boolean":
        return DataTypes.BooleanType;
    case "double":
    case "Double":
        return DataTypes.DoubleType;
    case "String":
        return DataTypes.StringType;
    case "Date":
    case "date":
        return DataTypes.DateType;
    case "Timestamp":
        return DataTypes.TimestampType;
    case "short":
    case "Short":
        return DataTypes.ShortType;
    case "Object":
        return DataTypes.BinaryType;
    default:
        log.debug("Using default for type [{}]", typeName);
        return DataTypes.BinaryType;
    }
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark.datasources,代码行数:36,代码来源:SparkBeanUtils.java

示例13: convertSqlTypeToSparkSqlDataType

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
    if (sqlType == java.sql.Types.BOOLEAN) {
        return DataTypes.BooleanType;
    } else if (sqlType == Types.TINYINT) {
        return DataTypes.ByteType;
    } else if (sqlType == Types.SMALLINT) {
        return DataTypes.ShortType;
    } else if (sqlType == java.sql.Types.INTEGER) {
        return DataTypes.IntegerType;
    } else if (sqlType == java.sql.Types.BIGINT) {
        return DataTypes.LongType;
    } else if (sqlType == Types.DECIMAL) {
        return DataTypes.createDecimalType();
    } else if (sqlType == java.sql.Types.FLOAT) {
        return DataTypes.FloatType;
    } else if (sqlType == java.sql.Types.DOUBLE) {
        return DataTypes.DoubleType;
    } else if (sqlType == Types.DATE) {
        return DataTypes.DateType;
    } else if (sqlType == Types.TIME) {
        return DataTypes.TimestampType;
    } else if (sqlType == Types.TIMESTAMP) {
        return DataTypes.TimestampType;
    } else if (sqlType == java.sql.Types.VARCHAR) {
        return DataTypes.StringType;
    } else {
        logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
        return DataTypes.StringType;
    }
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:31,代码来源:SparkUtils.java

示例14: readJsonWithSchema

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
@Test
public void readJsonWithSchema() throws Exception {
  Map<String, Object> paramMap = new HashMap<>();
  paramMap.put(FileSystemInput.FORMAT_CONFIG, "json");
  paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(JSON_DATA).getPath());
  paramMap.put(FileSystemInput.FIELD_NAMES_CONFIG, Lists.newArrayList("field1", "field2", "field3", "field4"));
  paramMap.put(FileSystemInput.FIELD_TYPES_CONFIG, Lists.newArrayList("int", "string", "boolean", "string"));
  config = ConfigFactory.parseMap(paramMap);

  FileSystemInput csvInput = new FileSystemInput();
  csvInput.configure(config);

  Dataset<Row> dataFrame = csvInput.read();
  dataFrame.printSchema();
  dataFrame.show();

  assertEquals(4, dataFrame.count());

  Row first = dataFrame.first();
  assertEquals("dog", first.getString(3));
  assertEquals("field1", first.schema().fields()[0].name());
  assertEquals(DataTypes.IntegerType, first.schema().fields()[0].dataType());
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:24,代码来源:TestFileSystemInput.java

示例15: SparkRDF4JSparqlRelation

import org.apache.spark.sql.types.DataTypes; //导入依赖的package包/类
/**
 * Constructor for a new {@link SparkRDF4JSparqlRelation} based on the given
 * service, query, schema, and context.
 * 
 * @param service
 *            The URL to the SPARQL service to be used for this query.
 * @param parsedQuery
 *            The preparsed SPARQL query.
 * @param schema
 *            The schema to use for the results of the query.
 * @param sqlContext
 *            The context for the query.
 */
SparkRDF4JSparqlRelation(String service, ParsedQuery parsedQuery, StructType schema, SQLContext sqlContext) {
	this.serviceField = Objects.requireNonNull(service);
	this.queryField = Objects.requireNonNull(parsedQuery);
	this.schemaField = Optional.ofNullable(schema).orElseGet(() -> {
		// These bindings are guaranteed to be present and are not nullable
		Set<String> assuredBindingNames = this.queryField.getTupleExpr().getAssuredBindingNames();
		// If bindings are only in the following they are nullable
		Set<String> bindingNames = this.queryField.getTupleExpr().getBindingNames();
		StructType result = new StructType();
		for(String binding : bindingNames) {
			result = result.add(binding, DataTypes.StringType, !(assuredBindingNames.contains(binding)));
		};
		return result;
	});
	this.sqlContextField = sqlContext;
}
 
开发者ID:ansell,项目名称:spark-rdf4j,代码行数:30,代码来源:SparkRDF4JSparqlRelation.java


注:本文中的org.apache.spark.sql.types.DataTypes类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。