当前位置: 首页>>代码示例>>Java>>正文


Java BigQueryIO类代码示例

本文整理汇总了Java中org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO的典型用法代码示例。如果您正苦于以下问题:Java BigQueryIO类的具体用法?Java BigQueryIO怎么用?Java BigQueryIO使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


BigQueryIO类属于org.apache.beam.sdk.io.gcp.bigquery包,在下文中一共展示了BigQueryIO类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: filterAlreadyProcessedUrls

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * @param options
 * @param pipeline
 * @param readContent
 * @return
 */
private static PCollection<InputContent> filterAlreadyProcessedUrls(
		PCollection<InputContent> readContent, Pipeline pipeline, 
		IndexerPipelineOptions options) {
	PCollection<InputContent> contentToProcess;
	String query = IndexerPipelineUtils.buildBigQueryProcessedUrlsQuery(options);
	PCollection<KV<String,Long>> alreadyProcessedUrls = pipeline
		.apply("Get processed URLs",BigQueryIO.read().fromQuery(query))
		.apply(ParDo.of(new GetUrlFn()));

	final PCollectionView<Map<String,Long>> alreadyProcessedUrlsSideInput =
		alreadyProcessedUrls.apply(View.<String,Long>asMap());
	  
	contentToProcess = readContent
		.apply(ParDo.of(new FilterProcessedUrls(alreadyProcessedUrlsSideInput))
			.withSideInputs(alreadyProcessedUrlsSideInput));
	return contentToProcess;
}
 
开发者ID:GoogleCloudPlatform,项目名称:dataflow-opinion-analysis,代码行数:24,代码来源:IndexerPipeline.java

示例2: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args) {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

  Pipeline p = Pipeline.create(options);

  // Build the table schema for the output table.
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
  TableSchema schema = new TableSchema().setFields(fields);

  p.apply(BigQueryIO.readTableRows().from(options.getInput()))
   .apply(new CountTornadoes())
   .apply(BigQueryIO.writeTableRows()
       .to(options.getOutput())
       .withSchema(schema)
       .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
       .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));

  p.run().waitUntilFinish();
}
 
开发者ID:apache,项目名称:beam,代码行数:22,代码来源:BigQueryTornadoes.java

示例3: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory
            .fromArgs(args)
            .withValidation()
            .as(TemplateOptions.class);
    options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(BigQueryIO.read().from(options.getBigQueryTableName()))
            .apply(ParDo.of(new DoFn<TableRow, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    String commaSep = c.element().values()
                            .stream()
                            .map(cell -> cell.toString().trim())
                            .collect(Collectors.joining("\",\""));
                    c.output(commaSep);
                }
            }))
            .apply(TextIO.write().to(options.getOutputFile())
                    .withoutSharding()
                    .withWritableByteChannelFactory(GZIP)
            );
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:bigquery-table-to-one-file,代码行数:26,代码来源:BigQueryTableToOneFile.java

示例4: expand

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
@Override
public PDone expand(PCollection<InputT> teamAndScore) {
  teamAndScore
      .apply("ConvertToRow", ParDo.of(new BuildRowFn()))
      .apply(
          BigQueryIO.writeTableRows()
              .to(getTable(projectId, datasetId, tableName))
              .withSchema(getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  return PDone.in(teamAndScore.getPipeline());
}
 
开发者ID:apache,项目名称:beam,代码行数:13,代码来源:WriteToBigQuery.java

示例5: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args)
    throws Exception {

  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  TableSchema schema = buildWeatherSchemaProjection();

  p.apply(BigQueryIO.readTableRows().from(options.getInput()))
   .apply(ParDo.of(new ProjectionFn()))
   .apply(new BelowGlobalMean(options.getMonthFilter()))
   .apply(BigQueryIO.writeTableRows()
      .to(options.getOutput())
      .withSchema(schema)
      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));

  p.run().waitUntilFinish();
}
 
开发者ID:apache,项目名称:beam,代码行数:20,代码来源:FilterExamples.java

示例6: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args)
    throws Exception {

  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  // Build the table schema for the output table.
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName("word").setType("STRING"));
  fields.add(new TableFieldSchema().setName("all_plays").setType("STRING"));
  TableSchema schema = new TableSchema().setFields(fields);

  p.apply(BigQueryIO.readTableRows().from(options.getInput()))
   .apply(new PlaysForWord())
   .apply(BigQueryIO.writeTableRows()
      .to(options.getOutput())
      .withSchema(schema)
      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));

  p.run().waitUntilFinish();
}
 
开发者ID:apache,项目名称:beam,代码行数:23,代码来源:CombinePerKeyExamples.java

示例7: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args)
    throws Exception {

  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  // Build the table schema for the output table.
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT"));
  TableSchema schema = new TableSchema().setFields(fields);

  p.apply(BigQueryIO.readTableRows().from(options.getInput()))
   .apply(new MaxMeanTemp())
   .apply(BigQueryIO.writeTableRows()
      .to(options.getOutput())
      .withSchema(schema)
      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));

  p.run().waitUntilFinish();
}
 
开发者ID:apache,项目名称:beam,代码行数:23,代码来源:MaxPerKeyExamples.java

示例8: sinkResultsToBigQuery

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * Send {@code formattedResults} to BigQuery.
 */
private void sinkResultsToBigQuery(
    PCollection<String> formattedResults, long now,
    String version) {
  String tableSpec = tableSpec(now, version);
  TableSchema tableSchema =
      new TableSchema().setFields(ImmutableList.of(
          new TableFieldSchema().setName("result").setType("STRING"),
          new TableFieldSchema().setName("records").setMode("REPEATED").setType("RECORD")
                                .setFields(ImmutableList.of(
                                    new TableFieldSchema().setName("index").setType("INTEGER"),
                                    new TableFieldSchema().setName("value").setType("STRING")))));
  NexmarkUtils.console("Writing results to BigQuery table %s", tableSpec);
  BigQueryIO.Write io =
      BigQueryIO.write().to(tableSpec)
                      .withSchema(tableSchema)
                      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND);
  formattedResults
      .apply(queryName + ".StringToTableRow", ParDo.of(new StringToTableRow()))
      .apply(queryName + ".WriteBigQueryResults", io);
}
 
开发者ID:apache,项目名称:beam,代码行数:25,代码来源:NexmarkLauncher.java

示例9: deploy

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/** Deploys the invoicing pipeline as a template on GCS, for a given projectID and GCS bucket. */
public void deploy() {
  // We can't store options as a member variable due to serialization concerns.
  InvoicingPipelineOptions options = PipelineOptionsFactory.as(InvoicingPipelineOptions.class);
  options.setProject(projectId);
  options.setRunner(DataflowRunner.class);
  options.setStagingLocation(beamBucket + "/staging");
  options.setTemplateLocation(beamBucket + "/templates/invoicing");
  Pipeline p = Pipeline.create(options);

  PCollection<BillingEvent> billingEvents =
      p.apply(
          "Read BillingEvents from Bigquery",
          BigQueryIO.read(BillingEvent::parseFromRecord)
              .fromQuery(InvoicingUtils.makeQueryProvider(options.getYearMonth(), projectId))
              .withCoder(SerializableCoder.of(BillingEvent.class))
              .usingStandardSql()
              .withoutValidation()
              .withTemplateCompatibility());
  applyTerminalTransforms(billingEvents, options.getYearMonth());
  p.run();
}
 
开发者ID:google,项目名称:nomulus,代码行数:23,代码来源:InvoicingPipeline.java

示例10: setWriteOperation

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
private BigQueryIO.Write setWriteOperation(BigQueryIO.Write bigQueryIOPTransform) {
    if (properties.tableOperation.getValue() == BigQueryOutputProperties.TableOperation.NONE
            || properties.tableOperation.getValue() == BigQueryOutputProperties.TableOperation.CREATE_IF_NOT_EXISTS) {
        switch (properties.writeOperation.getValue()) {
        case APPEND:
            bigQueryIOPTransform = bigQueryIOPTransform.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND);
            break;
        case WRITE_TO_EMPTY:
            bigQueryIOPTransform = bigQueryIOPTransform.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY);
            break;
        default:
            throw new RuntimeException("To be implemented: " + properties.writeOperation.getValue());
        }
    } else {
        if (properties.writeOperation.getValue() != null) {
            LOG.info("Write operation " + properties.writeOperation.getValue() + " be ignored when Table operation is "
                    + properties.tableOperation.getValue());
        }
    }
    return bigQueryIOPTransform;
}
 
开发者ID:Talend,项目名称:components,代码行数:22,代码来源:BigQueryOutputRuntime.java

示例11: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * <p>Creates a dataflow pipeline that creates the following chain:</p>
 * <ol>
 *   <li> Gets the records into the Pipeline
 *   <li> Creates Puts from each of the records
 *   <li> Performs a Bigtable Put on the records
 * </ol>
 *
 * @param args Arguments to use to configure the Dataflow Pipeline.  The first three are required
 *   when running via managed resource in Google Cloud Platform.  Those options should be omitted
 *   for LOCAL runs.  The last four arguments are to configure the Bigtable connection.
 *        --runner=BlockingDataflowPipelineRunner
 *        --project=[dataflow project] \\
 *        --stagingLocation=gs://[your google storage bucket] \\
 *        --bigtableProject=[bigtable project] \\
 *        --bigtableInstanceId=[bigtable instance id] \\
 *        --bigtableTableId=[bigtable tableName]
 */

public static void main(String[] args) {
  // CloudBigtableOptions is one way to retrieve the options.  It's not required.
  BigQueryBigtableTransferOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(BigQueryBigtableTransferOptions.class);

  // CloudBigtableTableConfiguration contains the project, instance and table to connect to.
  CloudBigtableTableConfiguration config =
      new CloudBigtableTableConfiguration.Builder()
      .withProjectId(options.getBigtableProjectId())
      .withInstanceId(options.getBigtableInstanceId())
      .withTableId(options.getBigtableTableId())
      .build();

  Pipeline p = Pipeline.create(options);

  p
      .apply(BigQueryIO.read().from("ReadSourceTable").fromQuery(options.getBqQuery())
          .usingStandardSql())
      .apply(ParDo.of(MUTATION_TRANSFORM))
      .apply(CloudBigtableIO.writeToTable(config));

  p.run().waitUntilFinish();

}
 
开发者ID:GoogleCloudPlatform,项目名称:cloud-bigtable-examples,代码行数:44,代码来源:BigQueryBigtableTransfer.java

示例12: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 */
public static void main(String[] args) throws IOException {
  TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(TrafficRoutesOptions.class);

  options.setBigQuerySchema(FormatStatsFn.getSchema());
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractStationSpeedFn()))
      // map the incoming data stream into sliding windows.
      .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
          Duration.standardMinutes(options.getWindowDuration())).
          every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new TrackSpeed())
      .apply(BigQueryIO.writeTableRows().to(tableRef)
          .withSchema(FormatStatsFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
开发者ID:apache,项目名称:beam,代码行数:40,代码来源:TrafficRoutes.java

示例13: filterAlreadyProcessedDocuments

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * @param contentToIndexNotSkipped
 * @param contentNotToIndexSkipped
 * @param pipeline
 * @param options
 * @return
 */
private static ContentToIndexOrNot filterAlreadyProcessedDocuments(
		PCollection<InputContent> contentToIndexNotSkipped, PCollection<InputContent> contentNotToIndexSkipped,
		Pipeline pipeline, IndexerPipelineOptions options) {
	PCollection<KV<String,Long>> alreadyProcessedDocs = null;
	
	if (!options.getWriteTruncate()) {
		String query = IndexerPipelineUtils.buildBigQueryProcessedDocsQuery(options);
		alreadyProcessedDocs = pipeline
			.apply("Get already processed Documents",BigQueryIO.read().fromQuery(query))
			.apply(ParDo.of(new GetDocumentHashFn()));

	} else {
		Map<String, Long> map = new HashMap<String,Long>();
		alreadyProcessedDocs = pipeline
			.apply("Create empty side input of Docs",
				Create.of(map).withCoder(KvCoder.of(StringUtf8Coder.of(),VarLongCoder.of())));
	}			
	
	final PCollectionView<Map<String,Long>> alreadyProcessedDocsSideInput =  
		alreadyProcessedDocs.apply(View.<String,Long>asMap());
	
	PCollectionTuple indexOrNotBasedOnExactDupes = contentToIndexNotSkipped
		.apply("Extract DocumentHash key", ParDo.of(new GetInputContentDocumentHashFn()))
		.apply("Group by DocumentHash key", GroupByKey.<String, InputContent>create())
		.apply("Eliminate InputContent Dupes", ParDo.of(new EliminateInputContentDupes(alreadyProcessedDocsSideInput))
			.withSideInputs(alreadyProcessedDocsSideInput)
			.withOutputTags(PipelineTags.contentToIndexNotExactDupesTag, // main output collection
				TupleTagList.of(PipelineTags.contentNotToIndexExactDupesTag))); // side output collection	
	
	PCollection<InputContent> contentToIndexNotExactDupes = indexOrNotBasedOnExactDupes.get(PipelineTags.contentToIndexNotExactDupesTag);
	PCollection<InputContent> contentNotToIndexExactDupes = indexOrNotBasedOnExactDupes.get(PipelineTags.contentNotToIndexExactDupesTag);
	
	// Merge the sets of items that are dupes or skipped
	PCollectionList<InputContent> contentNotToIndexList = PCollectionList.of(contentNotToIndexExactDupes).and(contentNotToIndexSkipped);
	
	ContentToIndexOrNot content = new ContentToIndexOrNot(contentToIndexNotExactDupes, contentNotToIndexList.apply(Flatten.<InputContent>pCollections()));
	return content;
}
 
开发者ID:GoogleCloudPlatform,项目名称:dataflow-opinion-analysis,代码行数:46,代码来源:IndexerPipeline.java

示例14: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
/**
 * Runs the DatastoreToBigQuery dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  NestedValueProvider<String, String> bqJsonSchema = NestedValueProvider
      .of(options.getBqJsonSchema(), new ValueProviderHelpers.GcsLoad());

  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);
  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToTableRow", ParDo.of(EntityToTableRow.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .setStrictCast(options.getStrictCast())
          .setTableSchemaJson(bqJsonSchema)
          .build()))
      .apply("TableRowToBigQuery", BigQueryIO.writeTableRows()
          .to(options.getBqTableSpec())
          .withJsonSchema(bqJsonSchema)
          .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:33,代码来源:DatastoreToBq.java

示例15: main

import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; //导入依赖的package包/类
public static void main(String[] args) {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TemplateOptions.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("READ", TextIO.read().from(options.getInputFile()).withCompressionType(GZIP))
            .apply("TRANSFORM", ParDo.of(new WikiParDo()))
            .apply("WRITE", BigQueryIO.writeTableRows()
                    .to(String.format("%s:devfest_melbourne_2017.wiki_demo", options.getProject()))
                    .withCreateDisposition(CREATE_IF_NEEDED)
                    .withWriteDisposition(WRITE_APPEND)
                    .withSchema(getTableSchema()));
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:devfest-melbourne-2017-demo,代码行数:14,代码来源:TemplatePipeline.java


注:本文中的org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。