当前位置: 首页>>代码示例>>Java>>正文


Java DataTypes.StringType方法代码示例

本文整理汇总了Java中org.apache.spark.sql.types.DataTypes.StringType方法的典型用法代码示例。如果您正苦于以下问题:Java DataTypes.StringType方法的具体用法?Java DataTypes.StringType怎么用?Java DataTypes.StringType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.sql.types.DataTypes的用法示例。


在下文中一共展示了DataTypes.StringType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: test_getDataSetResult

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void test_getDataSetResult() {

    StructField[] structFields = new StructField[]{
            new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
            new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
    };

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();
    rows.add(RowFactory.create(1, "v1"));
    rows.add(RowFactory.create(2, "v2"));

    Dataset<Row> df = sparkSession.createDataFrame(rows, structType);

    DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
    Assert.assertEquals(2, dataSetResult.getColumnNames().size());
    Assert.assertEquals(2, dataSetResult.getRows().size());
    Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
    Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
    Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
    Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:25,代码来源:SparkUtilsTest.java

示例2: parse

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Parses a list of PoS-tagged sentences, each on a line and writes the result to an output 
 * file in a specified output format.
 * @param jsc
 * @param sentences
 * @param outputFileName
 * @param outuptFormat
 */
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
	JavaRDD<String> input = jsc.parallelize(sentences);
	JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
	JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
	JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
		private static final long serialVersionUID = -812004521983071103L;
		public Row call(DependencyGraph graph) {
			return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
		}
	});
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),	
		new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame df = sqlContext.createDataFrame(rows, schema);
	
	if (outputFormat == OutputFormat.TEXT)  
		df.select("dependency").write().text(outputFileName);
	else 
		df.repartition(1).write().json(outputFileName);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:31,代码来源:DependencyParser.java

示例3: translateDataType

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
static
public DataType translateDataType(org.dmg.pmml.DataType dataType){

	switch(dataType){
		case STRING:
			return DataTypes.StringType;
		case INTEGER:
			return DataTypes.IntegerType;
		case FLOAT:
			return DataTypes.FloatType;
		case DOUBLE:
			return DataTypes.DoubleType;
		case BOOLEAN:
			return DataTypes.BooleanType;
		default:
			throw new IllegalArgumentException();
	}
}
 
开发者ID:jeremyore,项目名称:spark-pmml-import,代码行数:19,代码来源:SchemaUtil.java

示例4: getJdbcTypeString

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static String getJdbcTypeString(org.apache.spark.sql.types.DataType dataType, boolean isPrimaryKeyOrIndexKey, boolean isText) {
    int maxVarcharLength = isPrimaryKeyOrIndexKey ? 150 : 250;
    String sqlTypeForString = isText ? "TEXT" : String.format("VARCHAR(%s)", maxVarcharLength);
    if (dataType == DataTypes.TimestampType || dataType == DataTypes.DateType) {
        return "DATETIME";
    } else if (dataType == DataTypes.StringType) {
        return sqlTypeForString;
    } else if (dataType == DataTypes.IntegerType) {
        return "INT";
    } else if (dataType == DataTypes.LongType) {
        return "BIGINT";
    } else if (dataType == DataTypes.FloatType) {
        return "FLOAT";
    } else if (dataType == DataTypes.DoubleType) {
        return "DOUBLE";
    } else if (dataType == DataTypes.BooleanType) {
        return "TINYINT";
    } else if (dataType == DataTypes.ByteType) {
        return "SMALLINT";
    } else if (dataType instanceof org.apache.spark.sql.types.DecimalType) {
        org.apache.spark.sql.types.DecimalType decimalType = (org.apache.spark.sql.types.DecimalType) dataType;
        return String.format("DECIMAL(%d,%d)", decimalType.precision(), decimalType.scale());
    } else {
        throw new RuntimeException(String.format("Unsupported property type for JDBC: %s", dataType));
    }
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:27,代码来源:JdbcUtils.java

示例5: getDataType

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private DataType getDataType(int type) {
    switch (type) {
        case LiteralType.BOOLEAN:
            return DataTypes.BooleanType;
        case LiteralType.STRING:
            return DataTypes.StringType;
        case LiteralType.FLOAT:
            return DataTypes.FloatType;
        case LiteralType.DOUBLE:
            return DataTypes.DoubleType;
        case LiteralType.INTEGER:
            return DataTypes.IntegerType;
        case LiteralType.LONG:
            return DataTypes.LongType;
        case LiteralType.DATETIME:
            // datetime not supported due to timezone issues with java.sql.Timestamp
            // check the InstanceAggregator for more info
            return DataTypes.StringType;
    }
    throw new NotImplementedException("Not able to write literal type " + type);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:22,代码来源:InstanceRelationWriter.java

示例6: getDataTypeFromReturnType

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType getDataTypeFromReturnType(Method method) {
    String typeName = method.getReturnType().getSimpleName();
    switch (typeName) {
    case "int":
    case "Integer":
        return DataTypes.IntegerType;
    case "long":
    case "Long":
        return DataTypes.LongType;
    case "float":
    case "Float":
        return DataTypes.FloatType;
    case "boolean":
    case "Boolean":
        return DataTypes.BooleanType;
    case "double":
    case "Double":
        return DataTypes.DoubleType;
    case "String":
        return DataTypes.StringType;
    case "Date":
    case "date":
        return DataTypes.DateType;
    case "Timestamp":
        return DataTypes.TimestampType;
    case "short":
    case "Short":
        return DataTypes.ShortType;
    case "Object":
        return DataTypes.BinaryType;
    default:
        log.debug("Using default for type [{}]", typeName);
        return DataTypes.BinaryType;
    }
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark.datasources,代码行数:36,代码来源:SparkBeanUtils.java

示例7: convertSqlTypeToSparkSqlDataType

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static org.apache.spark.sql.types.DataType convertSqlTypeToSparkSqlDataType(int sqlType) {
    if (sqlType == java.sql.Types.BOOLEAN) {
        return DataTypes.BooleanType;
    } else if (sqlType == Types.TINYINT) {
        return DataTypes.ByteType;
    } else if (sqlType == Types.SMALLINT) {
        return DataTypes.ShortType;
    } else if (sqlType == java.sql.Types.INTEGER) {
        return DataTypes.IntegerType;
    } else if (sqlType == java.sql.Types.BIGINT) {
        return DataTypes.LongType;
    } else if (sqlType == Types.DECIMAL) {
        return DataTypes.createDecimalType();
    } else if (sqlType == java.sql.Types.FLOAT) {
        return DataTypes.FloatType;
    } else if (sqlType == java.sql.Types.DOUBLE) {
        return DataTypes.DoubleType;
    } else if (sqlType == Types.DATE) {
        return DataTypes.DateType;
    } else if (sqlType == Types.TIME) {
        return DataTypes.TimestampType;
    } else if (sqlType == Types.TIMESTAMP) {
        return DataTypes.TimestampType;
    } else if (sqlType == java.sql.Types.VARCHAR) {
        return DataTypes.StringType;
    } else {
        logger.warn(String.format("Using string for unsupported sql type %s", sqlType));
        return DataTypes.StringType;
    }
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:31,代码来源:SparkUtils.java

示例8: fromSchemaSequence

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Convert the DataVec sequence schema to a StructType for Spark, for example for use in
 * {@link #toDataFrameSequence(Schema, JavaRDD)}}
 * <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(DataRowsFacade)}
 *
 * @param schema Schema to convert
 * @return StructType for the schema
 */
public static StructType fromSchemaSequence(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns() + 2];

    structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
    structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());

    for (int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:45,代码来源:DataFrames.java

示例9: testGetPartialKey

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Test
public void testGetPartialKey() throws Exception {
  addEntriesToHBase();
  Table table = connection.getTable(TableName.valueOf(TABLE));
  scanAndCountTable(table, INPUT_ROWS * 4);

  Config config = ConfigUtils.configFromResource("/hbase/hbase-output-test.conf").getConfig("output");
  config = config.withValue("zookeeper",
      ConfigValueFactory.fromAnyRef("localhost:" + utility.getZkCluster().getClientPort()));

  HBaseOutput output = new HBaseOutput();
  output.configure(config);

  StructType partialKeySchema = new StructType(new StructField[] {
      new StructField("symbol", DataTypes.StringType, false, null)
  });
  List<Row> filters = Lists.newArrayList();
  filters.add(new RowWithSchema(partialKeySchema, "AAPL"));
  filters.add(new RowWithSchema(partialKeySchema, "GOOG"));

  Iterable<Row> filtered = output.getExistingForFilters(filters);
  assertEquals(25, Iterables.size(filtered));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:24,代码来源:TestHBaseOutput.java

示例10: createDataFrame

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Creates a data frame from a list of tagged sentences.
 * @param taggedSentences
 * @return a data frame of two columns: "sentence" and "partOfSpeech".
 */
public DataFrame createDataFrame(List<String> taggedSentences) {
	List<String> wordSequences = new LinkedList<String>();
	List<String> tagSequences = new LinkedList<String>();
	for (String taggedSentence : taggedSentences) {
		StringBuilder wordBuf = new StringBuilder();
		StringBuilder tagBuf = new StringBuilder();
		String[] tokens = taggedSentence.split("\\s+");
		for (String token : tokens) {
			String[] parts = token.split("/");
			if (parts.length == 2) {
				wordBuf.append(parts[0]);
				wordBuf.append(' ');
				tagBuf.append(parts[1]);
				tagBuf.append(' ');
			} else { // this token is "///"  
				wordBuf.append('/');
				wordBuf.append(' ');
				tagBuf.append('/');
				tagBuf.append(' ');
			}
		}
		wordSequences.add(wordBuf.toString().trim());
		tagSequences.add(tagBuf.toString().trim());
	}
	if (verbose) {
		System.out.println("Number of sentences = " + wordSequences.size());
	}
	List<Row> rows = new LinkedList<Row>();
	for (int i = 0; i < wordSequences.size(); i++) {
		rows.add(RowFactory.create(wordSequences.get(i), tagSequences.get(i)));
	}
	JavaRDD<Row> jrdd = jsc.parallelize(rows);
	StructType schema = new StructType(new StructField[]{
			new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
			new StructField("partOfSpeech", DataTypes.StringType, false, Metadata.empty())
		});
		
	return new SQLContext(jsc).createDataFrame(jrdd, schema);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:45,代码来源:Tagger.java

示例11: tag

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
/**
 * Tags a list of sequences and returns a list of tag sequences.
 * @param sentences
 * @return a list of tagged sequences.
 */
public List<String> tag(List<String> sentences) {
	List<Row> rows = new LinkedList<Row>();
	for (String sentence : sentences) {
		rows.add(RowFactory.create(sentence));
	}
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty())	
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame input = sqlContext.createDataFrame(rows, schema);
	if (cmmModel != null) {
		DataFrame output = cmmModel.transform(input).repartition(1);
		return output.javaRDD().map(new RowToStringFunction(1)).collect();
	} else {
		System.err.println("Tagging model is null. You need to create or load a model first.");
		return null;
	}
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:24,代码来源:Tagger.java

示例12: transform

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
@Override
public DataFrame transform(DataFrame dataset) {
	JavaRDD<Row> output = dataset.javaRDD().map(new DecodeFunction());
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),
		new StructField("prediction", DataTypes.StringType, false, Metadata.empty())
	});
	return dataset.sqlContext().createDataFrame(output, schema);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:10,代码来源:CMMModel.java

示例13: indexrSchemaToSparkSchema

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
public static List<StructField> indexrSchemaToSparkSchema(SegmentSchema schema) {
    List<StructField> fields = new ArrayList<>();
    for (ColumnSchema cs : schema.getColumns()) {
        DataType dataType;
        switch (cs.getSqlType()) {
            case INT:
                dataType = DataTypes.IntegerType;
                break;
            case BIGINT:
                dataType = DataTypes.LongType;
                break;
            case FLOAT:
                dataType = DataTypes.FloatType;
                break;
            case DOUBLE:
                dataType = DataTypes.DoubleType;
                break;
            case VARCHAR:
                dataType = DataTypes.StringType;
                break;
            case DATE:
                dataType = DataTypes.DateType;
                break;
            case DATETIME:
                dataType = DataTypes.TimestampType;
                break;
            default:
                throw new IllegalStateException("Unsupported type: " + cs.getSqlType());
        }
        fields.add(new StructField(cs.getName(), dataType, scala.Boolean.box(false), Metadata.empty()));
    }
    return fields;
}
 
开发者ID:shunfei,项目名称:indexr,代码行数:34,代码来源:IndexRUtil.java

示例14: parseDataType

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private static DataType parseDataType(Config fieldsConfig) {
  String type = fieldsConfig.getString(FIELD_TYPE_CONFIG);
  switch (type) {
    case "string":
      return DataTypes.StringType;
    case "byte":
      return DataTypes.ByteType;
    case "short":
      return DataTypes.ShortType;
    case "int":
      return DataTypes.IntegerType;
    case "long":
      return DataTypes.LongType;
    case "float":
      return DataTypes.FloatType;
    case "double":
      return DataTypes.DoubleType;
    case "decimal":
      ConfigUtils.assertConfig(fieldsConfig, DECIMAL_SCALE_CONFIG);
      ConfigUtils.assertConfig(fieldsConfig, DECIMAL_PRECISION_CONFIG);
      return DataTypes.createDecimalType(
              fieldsConfig.getInt(DECIMAL_SCALE_CONFIG),
              fieldsConfig.getInt(DECIMAL_PRECISION_CONFIG));
    case "boolean":
      return DataTypes.BooleanType;
    case "binary":
      return DataTypes.BinaryType;
    case "date":
      return DataTypes.DateType;
    case "timestamp":
      return DataTypes.TimestampType;
    case "array":
    case "map":
    case "struct":
      throw new RuntimeException("Schema check does not currently support complex types");
    default:
      throw new RuntimeException("Unknown type: " + type);
  }
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:40,代码来源:CheckSchemaDatasetRule.java

示例15: evaluateStepByKeyDecision

import org.apache.spark.sql.types.DataTypes; //导入方法依赖的package包/类
private boolean evaluateStepByKeyDecision(Set<Step> steps) {
  Optional<Step> optionalStep = StepUtils.getStepForName(stepByKeyStepName, steps);
  
  if (!optionalStep.isPresent()) {
    throw new RuntimeException("Unknown decision step's key step: " + stepByValueStepName);
  }
  
  if (!(optionalStep.get() instanceof DataStep)) {
    throw new RuntimeException("Decision step's key step is not a data step: " + optionalStep.get().getName());
  }
  
  Dataset<Row> keyDataset = ((DataStep)optionalStep.get()).getData();
  
  if (keyDataset.schema().fields().length != 2 ||
      keyDataset.schema().fields()[0].dataType() != DataTypes.StringType ||
      keyDataset.schema().fields()[1].dataType() != DataTypes.BooleanType)
  {
    throw new RuntimeException("Decision step's key step must contain a string column and then a boolean column");
  }
  
  String keyColumnName = keyDataset.schema().fieldNames()[0];
  String whereClause = keyColumnName + " = '" + stepByKeyKey + "'";
  Dataset<Row> decisionDataset = keyDataset.where(whereClause);
  
  if (decisionDataset.count() != 1) {
    throw new RuntimeException("Decision step's key step must contain a single record for the given key");
  }
  
  boolean decision = decisionDataset.collectAsList().get(0).getBoolean(1);
  
  return decision;
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:33,代码来源:DecisionStep.java


注:本文中的org.apache.spark.sql.types.DataTypes.StringType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。