当前位置: 首页>>代码示例>>Java>>正文


Java Metadata.empty方法代码示例

本文整理汇总了Java中org.apache.spark.sql.types.Metadata.empty方法的典型用法代码示例。如果您正苦于以下问题:Java Metadata.empty方法的具体用法?Java Metadata.empty怎么用?Java Metadata.empty使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.sql.types.Metadata的用法示例。


在下文中一共展示了Metadata.empty方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createNGramDataFrame

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
/**
 * Creates a n-gram data frame from text lines.
 * @param lines
 * @return a n-gram data frame.
 */
DataFrame createNGramDataFrame(JavaRDD<String> lines) {
	JavaRDD<Row> rows = lines.map(new Function<String, Row>(){
		private static final long serialVersionUID = -4332903997027358601L;
		
		@Override
		public Row call(String line) throws Exception {
			return RowFactory.create(Arrays.asList(line.split("\\s+")));
		}
	});
	StructType schema = new StructType(new StructField[] {
			new StructField("words",
					DataTypes.createArrayType(DataTypes.StringType), false,
					Metadata.empty()) });
	DataFrame wordDF = new SQLContext(jsc).createDataFrame(rows, schema);
	// build a bigram language model
	NGram transformer = new NGram().setInputCol("words")
			.setOutputCol("ngrams").setN(2);
	DataFrame ngramDF = transformer.transform(wordDF);
	ngramDF.show(10, false);
	return ngramDF;
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:27,代码来源:NGramBuilder.java

示例2: test_getDataSetResult

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void test_getDataSetResult() {

    StructField[] structFields = new StructField[]{
            new StructField("intColumn", DataTypes.IntegerType, true, Metadata.empty()),
            new StructField("stringColumn", DataTypes.StringType, true, Metadata.empty())
    };

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();
    rows.add(RowFactory.create(1, "v1"));
    rows.add(RowFactory.create(2, "v2"));

    Dataset<Row> df = sparkSession.createDataFrame(rows, structType);

    DataSetResult dataSetResult = SparkUtils.getDataSetResult(df);
    Assert.assertEquals(2, dataSetResult.getColumnNames().size());
    Assert.assertEquals(2, dataSetResult.getRows().size());
    Assert.assertEquals(new Integer(1), dataSetResult.getRows().get(0).get(0));
    Assert.assertEquals("v1", dataSetResult.getRows().get(0).get(1));
    Assert.assertEquals(new Integer(2), dataSetResult.getRows().get(1).get(0));
    Assert.assertEquals("v2", dataSetResult.getRows().get(1).get(1));
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:25,代码来源:SparkUtilsTest.java

示例3: generateData_week_timepoints_by_10_minutes

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
private static Dataset<Row> generateData_week_timepoints_by_10_minutes(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "timepoint";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    int weekTotalMinutes = 7 * 24 * 60;
    int timepointIntervalMinutes = 10;
    for (int i = 0; i < weekTotalMinutes / timepointIntervalMinutes; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:24,代码来源:QueryEngine.java

示例4: parse

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
/**
 * Parses a list of PoS-tagged sentences, each on a line and writes the result to an output 
 * file in a specified output format.
 * @param jsc
 * @param sentences
 * @param outputFileName
 * @param outuptFormat
 */
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
	JavaRDD<String> input = jsc.parallelize(sentences);
	JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
	JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
	JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
		private static final long serialVersionUID = -812004521983071103L;
		public Row call(DependencyGraph graph) {
			return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
		}
	});
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),	
		new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame df = sqlContext.createDataFrame(rows, schema);
	
	if (outputFormat == OutputFormat.TEXT)  
		df.select("dependency").write().text(outputFileName);
	else 
		df.repartition(1).write().json(outputFileName);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:31,代码来源:DependencyParser.java

示例5: fromSchema

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
/**
 * Convert a datavec schema to a
 * struct type in spark
 *
 * @param schema the schema to convert
 * @return the datavec struct type
 */
public static StructType fromSchema(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns()];
    for (int i = 0; i < structFields.length; i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:32,代码来源:DataFrames.java

示例6: generateData_numbers_1k

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
private static Dataset<Row> generateData_numbers_1k(SparkSession spark) {
    StructField[] structFields = new StructField[1];
    org.apache.spark.sql.types.DataType dataType = DataTypes.IntegerType;
    String column = "number";
    StructField structField = new StructField(column, dataType, true, Metadata.empty());
    structFields[0] = structField;

    StructType structType = new StructType(structFields);

    List<Row> rows = new ArrayList<>();

    for (int i = 0; i <= 1000; i++) {
        Object[] objects = new Object[structFields.length];
        objects[0] = i;
        Row row = RowFactory.create(objects);
        rows.add(row);
    }

    Dataset<Row> df = spark.createDataFrame(rows, structType);
    return df;
}
 
开发者ID:uber,项目名称:uberscriptquery,代码行数:22,代码来源:QueryEngine.java

示例7: testPruneByStepValueTrue

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testPruneByStepValueTrue() {
  StructType schema = new StructType(new StructField[] {
      new StructField("outcome", DataTypes.BooleanType, false, Metadata.empty())
  });
  List<Row> rows = Lists.newArrayList(
      RowFactory.create(true)
  );
  Dataset<Row> ds = Contexts.getSparkSession().createDataFrame(rows, schema);
  step1.setData(ds);
  
  Map<String, Object> step2ConfigMap = Maps.newHashMap();
  step2ConfigMap.put("dependencies", Lists.newArrayList("step1"));
  step2ConfigMap.put(DecisionStep.IF_TRUE_STEP_NAMES_PROPERTY, Lists.newArrayList("step3", "step7"));
  step2ConfigMap.put(DecisionStep.DECISION_METHOD_PROPERTY, DecisionStep.STEP_BY_VALUE_DECISION_METHOD);
  step2ConfigMap.put(DecisionStep.STEP_BY_VALUE_STEP_PROPERTY, "step1");
  Config step2Config = ConfigFactory.parseMap(step2ConfigMap);
  RefactorStep step2 = new DecisionStep("step2", step2Config);
  steps.add(step2);
  
  Set<Step> refactored = step2.refactor(steps);
  
  assertEquals(refactored, Sets.newHashSet(step1, step2, step3, step4, step7, step8));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:25,代码来源:TestDecisionStep.java

示例8: testPruneByStepValueFalse

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testPruneByStepValueFalse() {
  StructType schema = new StructType(new StructField[] {
      new StructField("outcome", DataTypes.BooleanType, false, Metadata.empty())
  });
  List<Row> rows = Lists.newArrayList(
      RowFactory.create(false)
  );
  Dataset<Row> ds = Contexts.getSparkSession().createDataFrame(rows, schema);
  step1.setData(ds);
  
  Map<String, Object> step2ConfigMap = Maps.newHashMap();
  step2ConfigMap.put("dependencies", Lists.newArrayList("step1"));
  step2ConfigMap.put(DecisionStep.IF_TRUE_STEP_NAMES_PROPERTY, Lists.newArrayList("step3", "step7"));
  step2ConfigMap.put(DecisionStep.DECISION_METHOD_PROPERTY, DecisionStep.STEP_BY_VALUE_DECISION_METHOD);
  step2ConfigMap.put(DecisionStep.STEP_BY_VALUE_STEP_PROPERTY, "step1");
  Config step2Config = ConfigFactory.parseMap(step2ConfigMap);
  RefactorStep step2 = new DecisionStep("step2", step2Config);
  steps.add(step2);
  
  Set<Step> refactored = step2.refactor(steps);
  
  assertEquals(refactored, Sets.newHashSet(step1, step2, step5, step6));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:25,代码来源:TestDecisionStep.java

示例9: testAgeRangeFloat

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testAgeRangeFloat() {
  StructType schema = new StructType(new StructField[] {
      new StructField("name", DataTypes.StringType, false, Metadata.empty()),
      new StructField("nickname", DataTypes.StringType, false, Metadata.empty()),
      new StructField("age", DataTypes.FloatType, false, Metadata.empty()),
      new StructField("candycrushscore", DataTypes.createDecimalType(), false, Metadata.empty())
  });

  Map<String, Object> configMap = new HashMap<>();
  configMap.put("fields", Lists.newArrayList("age"));
  configMap.put("fieldtype", "float");
  configMap.put("range", Lists.newArrayList(0.1,105.0));
  Config config = ConfigFactory.parseMap(configMap);

  RangeRowRule rule = new RangeRowRule();
  rule.configure("agerange", config);

  Row row1 = new RowWithSchema(schema, "Ian", "Ian", 34.0f, new BigDecimal("0.00"));
  assertTrue("Row should pass rule", rule.check(row1));

  Row row2 = new RowWithSchema(schema, "Webster1", "Websta1", 110.0f, new BigDecimal("450.10"));
  assertFalse("Row should not pass rule", rule.check(row2));

  Row row3 = new RowWithSchema(schema, "", "Ian1", 110.0f, new BigDecimal("450.10"));
  assertFalse("Row should not pass rule", rule.check(row3));

  Row row4 = new RowWithSchema(schema, "First Last", "Ian Last", 100.0f, new BigDecimal("450.10"));
  assertTrue("Row should pass rule", rule.check(row4));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:31,代码来源:TestRangeRowRule.java

示例10: tag

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
/**
 * Tags a list of sequences and returns a list of tag sequences.
 * @param sentences
 * @return a list of tagged sequences.
 */
public List<String> tag(List<String> sentences) {
	List<Row> rows = new LinkedList<Row>();
	for (String sentence : sentences) {
		rows.add(RowFactory.create(sentence));
	}
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty())	
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame input = sqlContext.createDataFrame(rows, schema);
	if (cmmModel != null) {
		DataFrame output = cmmModel.transform(input).repartition(1);
		return output.javaRDD().map(new RowToStringFunction(1)).collect();
	} else {
		System.err.println("Tagging model is null. You need to create or load a model first.");
		return null;
	}
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:24,代码来源:Tagger.java

示例11: testStandardScaler

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testStandardScaler() {


    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
            RowFactory.create(1.0, Vectors.dense(data[0])),
            RowFactory.create(2.0, Vectors.dense(data[1])),
            RowFactory.create(3.0, Vectors.dense(data[2]))
    ));

    StructType schema = new StructType(new StructField[]{
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("features", new VectorUDT(), false, Metadata.empty())
    });

    Dataset<Row> df = spark.createDataFrame(jrdd, schema);

    //train model in spark
    StandardScalerModel sparkModelNone = new StandardScaler()
            .setInputCol("features")
            .setOutputCol("scaledOutput")
            .setWithMean(false)
            .setWithStd(false)
            .fit(df);

    StandardScalerModel sparkModelWithMean = new StandardScaler()
            .setInputCol("features")
            .setOutputCol("scaledOutput")
            .setWithMean(true)
            .setWithStd(false)
            .fit(df);

    StandardScalerModel sparkModelWithStd = new StandardScaler()
            .setInputCol("features")
            .setOutputCol("scaledOutput")
            .setWithMean(false)
            .setWithStd(true)
            .fit(df);

    StandardScalerModel sparkModelWithBoth = new StandardScaler()
            .setInputCol("features")
            .setOutputCol("scaledOutput")
            .setWithMean(true)
            .setWithStd(true)
            .fit(df);


    //Export model, import it back and get transformer
    byte[] exportedModel = ModelExporter.export(sparkModelNone);
    final Transformer transformerNone = ModelImporter.importAndGetTransformer(exportedModel);

    exportedModel = ModelExporter.export(sparkModelWithMean);
    final Transformer transformerWithMean = ModelImporter.importAndGetTransformer(exportedModel);

    exportedModel = ModelExporter.export(sparkModelWithStd);
    final Transformer transformerWithStd = ModelImporter.importAndGetTransformer(exportedModel);

    exportedModel = ModelExporter.export(sparkModelWithBoth);
    final Transformer transformerWithBoth = ModelImporter.importAndGetTransformer(exportedModel);


    //compare predictions
    List<Row> sparkNoneOutput = sparkModelNone.transform(df).orderBy("label").select("features", "scaledOutput").collectAsList();
    assertCorrectness(sparkNoneOutput, data, transformerNone);

    List<Row> sparkWithMeanOutput = sparkModelWithMean.transform(df).orderBy("label").select("features", "scaledOutput").collectAsList();
    assertCorrectness(sparkWithMeanOutput, resWithMean, transformerWithMean);

    List<Row> sparkWithStdOutput = sparkModelWithStd.transform(df).orderBy("label").select("features", "scaledOutput").collectAsList();
    assertCorrectness(sparkWithStdOutput, resWithStd, transformerWithStd);

    List<Row> sparkWithBothOutput = sparkModelWithBoth.transform(df).orderBy("label").select("features", "scaledOutput").collectAsList();
    assertCorrectness(sparkWithBothOutput, resWithBoth, transformerWithBoth);

}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:76,代码来源:StandardScalerBridgeTest.java

示例12: parseField

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
private static StructField parseField(Config fieldsConfig) {
  ConfigUtils.assertConfig(fieldsConfig, FIELD_NAME_CONFIG);
  ConfigUtils.assertConfig(fieldsConfig, FIELD_TYPE_CONFIG);

  String name = fieldsConfig.getString(FIELD_NAME_CONFIG);
  DataType type = parseDataType(fieldsConfig);
  return new StructField(name, type, true, Metadata.empty());
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:9,代码来源:CheckSchemaDatasetRule.java

示例13: testPruneByStepKeyFalse

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testPruneByStepKeyFalse() {
  StructType schema = new StructType(new StructField[] {
      new StructField("name", DataTypes.StringType, false, Metadata.empty()),
      new StructField("result", DataTypes.BooleanType, false, Metadata.empty())
  });
  List<Row> rows = Lists.newArrayList(
      RowFactory.create("namecheck", false),
      RowFactory.create("agerange", true)
  );
  Dataset<Row> ds = Contexts.getSparkSession().createDataFrame(rows, schema);
  step1.setData(ds);
  
  Map<String, Object> step2ConfigMap = Maps.newHashMap();
  step2ConfigMap.put("dependencies", Lists.newArrayList("step1"));
  step2ConfigMap.put(DecisionStep.IF_TRUE_STEP_NAMES_PROPERTY, Lists.newArrayList("step3", "step7"));
  step2ConfigMap.put(DecisionStep.DECISION_METHOD_PROPERTY, DecisionStep.STEP_BY_KEY_DECISION_METHOD);
  step2ConfigMap.put(DecisionStep.STEP_BY_KEY_STEP_PROPERTY, "step1");
  step2ConfigMap.put(DecisionStep.STEP_BY_KEY_KEY_PROPERTY, "namecheck");
  Config step2Config = ConfigFactory.parseMap(step2ConfigMap);
  RefactorStep step2 = new DecisionStep("step2", step2Config);
  steps.add(step2);
  
  Set<Step> refactored = step2.refactor(steps);
  
  assertEquals(refactored, Sets.newHashSet(step1, step2, step5, step6));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:28,代码来源:TestDecisionStep.java

示例14: testAgeRangeLong

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testAgeRangeLong() {
  StructType schema = new StructType(new StructField[] {
      new StructField("name", DataTypes.StringType, false, Metadata.empty()),
      new StructField("nickname", DataTypes.StringType, false, Metadata.empty()),
      new StructField("age", DataTypes.LongType, false, Metadata.empty()),
      new StructField("candycrushscore", DataTypes.createDecimalType(), false, Metadata.empty())
  });

  Map<String, Object> configMap = new HashMap<>();
  configMap.put("fields", Lists.newArrayList("age"));
  configMap.put("range", Lists.newArrayList(0l,105l));
  Config config = ConfigFactory.parseMap(configMap);

  RangeRowRule rule = new RangeRowRule();
  rule.configure("agerange", config);

  Row row1 = new RowWithSchema(schema, "Ian", "Ian", 34l, new BigDecimal("0.00"));
  assertTrue("Row should pass rule", rule.check(row1));

  Row row2 = new RowWithSchema(schema, "Webster1", "Websta1", 110l, new BigDecimal("450.10"));
  assertFalse("Row should not pass rule", rule.check(row2));

  Row row3 = new RowWithSchema(schema, "", "Ian1", 110l, new BigDecimal("450.10"));
  assertFalse("Row should not pass rule", rule.check(row3));

  Row row4 = new RowWithSchema(schema, "First Last", "Ian Last", 100l, new BigDecimal("450.10"));
  assertTrue("Row should pass rule", rule.check(row4));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:30,代码来源:TestRangeRowRule.java

示例15: testAgeRangeDecimal

import org.apache.spark.sql.types.Metadata; //导入方法依赖的package包/类
@Test
public void testAgeRangeDecimal() {
  StructType schema = new StructType(new StructField[] {
      new StructField("name", DataTypes.StringType, false, Metadata.empty()),
      new StructField("nickname", DataTypes.StringType, false, Metadata.empty()),
      new StructField("age", DataTypes.DoubleType, false, Metadata.empty()),
      new StructField("candycrushscore", DataTypes.createDecimalType(), false, Metadata.empty())
  });

  Map<String, Object> configMap = new HashMap<>();
  configMap.put("fields", Lists.newArrayList("candycrushscore"));
  configMap.put("fieldtype", "decimal");
  configMap.put("range", Lists.newArrayList("-1.56","400.45"));
  Config config = ConfigFactory.parseMap(configMap);

  RangeRowRule rule = new RangeRowRule();
  rule.configure("agerange", config);

  Row row1 = new RowWithSchema(schema, "Ian", "Ian", 34.0, new BigDecimal("-1.00"));
  assertTrue("Row should pass rule", rule.check(row1));

  Row row2 = new RowWithSchema(schema, "Webster1", "Websta1", 110.0, new BigDecimal("-1.57"));
  assertFalse("Row should not pass rule", rule.check(row2));

  Row row3 = new RowWithSchema(schema, "", "Ian1", 110.0, new BigDecimal("450.10"));
  assertFalse("Row should not pass rule", rule.check(row3));

  Row row4 = new RowWithSchema(schema, "First Last", "Ian Last", 100.0, new BigDecimal("400.45"));
  assertTrue("Row should pass rule", rule.check(row4));
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:31,代码来源:TestRangeRowRule.java


注:本文中的org.apache.spark.sql.types.Metadata.empty方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。