本文整理汇总了Java中com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration类的典型用法代码示例。如果您正苦于以下问题:Java BigQueryConfiguration类的具体用法?Java BigQueryConfiguration怎么用?Java BigQueryConfiguration使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BigQueryConfiguration类属于com.google.cloud.hadoop.io.bigquery包,在下文中一共展示了BigQueryConfiguration类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
});
jsc.start();
jsc.awaitTermination();
}
}
示例2: outputTo
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
public static <X> VoidFunction<JavaPairRDD<X, JsonObject>> outputTo(String table, String schema) throws IOException {
Configuration conf = new Configuration();
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
BigQueryConfiguration.configureBigQueryOutput(conf, table, schema);
return rdd -> {
if (rdd.count() > 0L) {
long time = System.currentTimeMillis();
/* This was only required the first time on a fresh table, it seems I had to kickstart the _PARTITIONTIME pseudo-column
* but now it automatically add to the proper table using ingestion time. Using the decorator would only be required
* if we were to place the entries using their "event timestamp", e.g. loading rows on old partitions.
* Implementing that would be much harder though, since'd have to check each message, or each "partition" (date-based)
if (partitioned) {
String today = ZonedDateTime.now(ZoneOffset.UTC).format(DateTimeFormatter.ofPattern("yyyyMMdd"));
BigQueryConfiguration.configureBigQueryOutput(conf, table + "$" + today, schema);
}*/
rdd.saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent %d rows to BQ in %.1fs\n", rdd.count(), (System.currentTimeMillis() - time) / 1000f);
}
};
}
示例3: getTableSchema
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/**
* Gets the output table schema based on the given configuration.
*
* @param conf the configuration to reference the keys from.
* @return the derived table schema, null if no table schema exists in the configuration.
* @throws IOException if a table schema was set in the configuration but couldn't be parsed.
*/
public static TableSchema getTableSchema(Configuration conf) throws IOException {
String outputSchema = conf.get(BigQueryConfiguration.OUTPUT_TABLE_SCHEMA_KEY);
if (!Strings.isNullOrEmpty(outputSchema)) {
try {
List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
JsonParser parser = JacksonFactory.getDefaultInstance().createJsonParser(outputSchema);
parser.parseArrayAndClose(fields, TableFieldSchema.class);
return new TableSchema().setFields(fields);
} catch (IOException e) {
throw new IOException(
"Unable to parse key '" + BigQueryConfiguration.OUTPUT_TABLE_SCHEMA_KEY + "'.", e);
}
}
return null;
}
示例4: getFileOutputFormat
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/**
* Gets a configured instance of the stored {@link FileOutputFormat} in the configuration.
*
* @param conf the configuration to reference the keys from.
* @return a configured instance of the stored {@link FileOutputFormat} in the configuration.
* @throws IOException if there's an issue getting an instance of a FileOutputFormat from the
* configuration.
*/
@SuppressWarnings("rawtypes")
public static FileOutputFormat getFileOutputFormat(Configuration conf) throws IOException {
// Ensure the BigQuery output information is valid.
ConfigurationUtil.getMandatoryConfig(conf, BigQueryConfiguration.OUTPUT_FORMAT_CLASS_KEY);
Class<?> confClass = conf.getClass(BigQueryConfiguration.OUTPUT_FORMAT_CLASS_KEY, null);
// Fail if the default value was used, or the class isn't a FileOutputFormat.
if (confClass == null) {
throw new IOException(
"Unable to resolve value for the configuration key '"
+ BigQueryConfiguration.OUTPUT_FORMAT_CLASS_KEY
+ "'.");
} else if (!FileOutputFormat.class.isAssignableFrom(confClass)) {
throw new IOException("The class " + confClass.getName() + " is not a FileOutputFormat.");
}
Class<? extends FileOutputFormat> fileOutputClass =
confClass.asSubclass(FileOutputFormat.class);
// Create a new instance and configure it if it's configurable.
return ReflectionUtils.newInstance(fileOutputClass, conf);
}
示例5: testConfigure
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the configure function correctly sets the configuration keys. */
@Test
public void testConfigure() throws IOException {
BigQueryOutputConfiguration.configure(
conf,
TEST_PROJECT_ID,
TEST_DATASET_ID,
TEST_TABLE_ID,
TEST_TABLE_SCHEMA,
TEST_OUTPUT_PATH_STRING,
TEST_FILE_FORMAT,
TEST_OUTPUT_CLASS);
assertThat(conf.get(BigQueryConfiguration.OUTPUT_PROJECT_ID_KEY), is(TEST_PROJECT_ID));
assertThat(conf.get(BigQueryConfiguration.OUTPUT_DATASET_ID_KEY), is(TEST_DATASET_ID));
assertThat(conf.get(BigQueryConfiguration.OUTPUT_TABLE_ID_KEY), is(TEST_TABLE_ID));
assertThat(conf.get(BigQueryConfiguration.OUTPUT_FILE_FORMAT_KEY), is(TEST_FILE_FORMAT.name()));
assertThat(
conf.get(BigQueryConfiguration.OUTPUT_FORMAT_CLASS_KEY), is(TEST_OUTPUT_CLASS.getName()));
assertThat(
conf.get(BigQueryConfiguration.OUTPUT_TABLE_SCHEMA_KEY), is(TEST_TABLE_SCHEMA_STRING));
assertThat(
BigQueryOutputConfiguration.getGcsOutputPath(conf).toString(), is(TEST_OUTPUT_PATH_STRING));
}
示例6: testValidateConfigurationMissingProjectId
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the validateConfiguration errors on missing project id. */
@Test
public void testValidateConfigurationMissingProjectId() throws IOException {
expectedException.expect(IOException.class);
BigQueryOutputConfiguration.configure(
conf,
TEST_PROJECT_ID,
TEST_DATASET_ID,
TEST_TABLE_ID,
TEST_TABLE_SCHEMA,
TEST_OUTPUT_PATH_STRING,
TEST_FILE_FORMAT,
TEST_OUTPUT_CLASS);
conf.unset(BigQueryConfiguration.OUTPUT_PROJECT_ID_KEY);
BigQueryOutputConfiguration.validateConfiguration(conf);
}
示例7: testValidateConfigurationBadSchema
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the validateConfiguration errors on missing table schema. */
@Test
public void testValidateConfigurationBadSchema() throws IOException {
expectedException.expect(IOException.class);
BigQueryOutputConfiguration.configure(
conf,
TEST_PROJECT_ID,
TEST_DATASET_ID,
TEST_TABLE_ID,
TEST_TABLE_SCHEMA,
TEST_OUTPUT_PATH_STRING,
TEST_FILE_FORMAT,
TEST_OUTPUT_CLASS);
conf.set(BigQueryConfiguration.OUTPUT_TABLE_SCHEMA_KEY, TEST_BAD_TABLE_SCHEMA_STRING);
BigQueryOutputConfiguration.validateConfiguration(conf);
}
示例8: testValidateConfigurationMissingFileFormat
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the validateConfiguration errors on missing file format. */
@Test
public void testValidateConfigurationMissingFileFormat() throws IOException {
expectedException.expect(IOException.class);
BigQueryOutputConfiguration.configure(
conf,
TEST_PROJECT_ID,
TEST_DATASET_ID,
TEST_TABLE_ID,
TEST_TABLE_SCHEMA,
TEST_OUTPUT_PATH_STRING,
TEST_FILE_FORMAT,
TEST_OUTPUT_CLASS);
conf.unset(BigQueryConfiguration.OUTPUT_FILE_FORMAT_KEY);
BigQueryOutputConfiguration.validateConfiguration(conf);
}
示例9: testValidateConfigurationMissingOutputFormat
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the validateConfiguration errors on missing output format class. */
@Test
public void testValidateConfigurationMissingOutputFormat() throws IOException {
expectedException.expect(IOException.class);
BigQueryOutputConfiguration.configure(
conf,
TEST_PROJECT_ID,
TEST_DATASET_ID,
TEST_TABLE_ID,
TEST_TABLE_SCHEMA,
TEST_OUTPUT_PATH_STRING,
TEST_FILE_FORMAT,
TEST_OUTPUT_CLASS);
conf.unset(BigQueryConfiguration.OUTPUT_FORMAT_CLASS_KEY);
BigQueryOutputConfiguration.validateConfiguration(conf);
}
示例10: main
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-OffsetsToZK");
try (JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
LongAccumulator stopCondition = jsc.ssc().sc().longAccumulator();
JavaPairDStream<String, String> stream = dealWithOffsets(jsc);
final ParseXML parseXML = new ParseXML();
JavaPairDStream<String,ExampleXML> records = stream.mapToPair(
tuple -> new Tuple2<>(tuple._1(), parseXML.call(tuple._2())));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
if (rdd.count() > 0L) {
stopCondition.reset();
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis() - time) / 1000f);
} else {
stopCondition.add(1L);
if (stopCondition.value() >= 2L)
jsc.stop();
}
});
jsc.start();
jsc.awaitTermination();
}
}
示例11: getProjectId
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/**
* Gets the project id based on the given configuration. If the {@link
* BigQueryConfiguration#OUTPUT_PROJECT_ID_KEY} is missing, this resolves to referencing the
* {@link BigQueryConfiguration#PROJECT_ID_KEY} key.
*
* @param conf the configuration to reference the keys from.
* @return the project id based on the given configuration.
* @throws IOException if a required key is missing.
*/
public static String getProjectId(Configuration conf) throws IOException {
// Reference the default project ID as a backup.
String projectId = conf.get(BigQueryConfiguration.OUTPUT_PROJECT_ID_KEY);
if (Strings.isNullOrEmpty(projectId)) {
projectId = conf.get(BigQueryConfiguration.PROJECT_ID_KEY);
}
if (Strings.isNullOrEmpty(projectId)) {
throw new IOException(
"Must supply a value for configuration setting: "
+ BigQueryConfiguration.OUTPUT_PROJECT_ID_KEY);
}
return projectId;
}
示例12: getTableReference
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/**
* Gets the output table reference based on the given configuration. If the {@link
* BigQueryConfiguration#OUTPUT_PROJECT_ID_KEY} is missing, this resolves to referencing the
* {@link BigQueryConfiguration#PROJECT_ID_KEY} key.
*
* @param conf the configuration to reference the keys from.
* @return a reference to the derived output table.
* @throws IOException if a required key is missing.
*/
public static TableReference getTableReference(Configuration conf) throws IOException {
// Ensure the BigQuery output information is valid.
String projectId = getProjectId(conf);
String datasetId =
ConfigurationUtil.getMandatoryConfig(conf, BigQueryConfiguration.OUTPUT_DATASET_ID_KEY);
String tableId =
ConfigurationUtil.getMandatoryConfig(conf, BigQueryConfiguration.OUTPUT_TABLE_ID_KEY);
return new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId);
}
示例13: getFileFormat
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/**
* Gets the stored output {@link BigQueryFileFormat} in the configuration.
*
* @param conf the configuration to reference the keys from.
* @return the stored output {@link BigQueryFileFormat} in the configuration.
* @throws IOException if file format value is missing from the configuration.
*/
public static BigQueryFileFormat getFileFormat(Configuration conf) throws IOException {
// Ensure the BigQuery output information is valid.
String fileFormatName =
ConfigurationUtil.getMandatoryConfig(conf, BigQueryConfiguration.OUTPUT_FILE_FORMAT_KEY);
return BigQueryFileFormat.fromName(fileFormatName);
}
示例14: testGetProjectId
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the getProjectId returns the correct data. */
@Test
public void testGetProjectId() throws IOException {
conf.set(BigQueryConfiguration.OUTPUT_PROJECT_ID_KEY, TEST_PROJECT_ID);
String result = BigQueryOutputConfiguration.getProjectId(conf);
assertThat(result, is(TEST_PROJECT_ID));
}
示例15: testGetProjectIdBackup
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration; //导入依赖的package包/类
/** Test the getProjectId returns the correct data. */
@Test
public void testGetProjectIdBackup() throws IOException {
conf.set(BigQueryConfiguration.PROJECT_ID_KEY, TEST_PROJECT_ID);
String result = BigQueryOutputConfiguration.getProjectId(conf);
assertThat(result, is(TEST_PROJECT_ID));
}