当前位置: 首页>>代码示例>>Java>>正文


Java TextIO类代码示例

本文整理汇总了Java中com.google.cloud.dataflow.sdk.io.TextIO的典型用法代码示例。如果您正苦于以下问题:Java TextIO类的具体用法?Java TextIO怎么用?Java TextIO使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TextIO类属于com.google.cloud.dataflow.sdk.io包,在下文中一共展示了TextIO类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Run a batch pipeline.
 */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from a CSV file and parse them.
  pipeline
      .apply(TextIO.Read.from(options.getInput()))
      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
      // Write the results to BigQuery.
      .apply(ParDo.named("FormatUserScoreSums").of(new FormatUserScoreSumsFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(FormatUserScoreSumsFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:29,代码来源:Exercise1.java

示例2: apply

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
@Override
public PCollection<GameEvent> apply(PBegin begin) {
  if (options.getInput() != null && !options.getInput().isEmpty()) {
    return begin
        .getPipeline()
        .apply(TextIO.Read.from(options.getInput()))
        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
        .apply(
            "AddEventTimestamps",
            WithTimestamps.of((GameEvent i) -> new Instant(i.getTimestamp())));
  } else {
    return begin
        .getPipeline()
        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
  }
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:18,代码来源:Exercise3.java

示例3: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Run a batch pipeline.
 */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from a CSV file, parse them and write (import) them to BigQuery.
  pipeline
      .apply(TextIO.Read.from(options.getInput()))
      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
      .apply(ParDo.named("FormatGameEvent").of(new FormatGameEventFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(FormatGameEventFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:26,代码来源:Exercise0.java

示例4: runInjectorPipeline

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Runs the batch injector for the streaming pipeline.
 *
 * <p>The injector pipeline will read from the given text file, and inject data
 * into the Google Cloud Pub/Sub topic.
 */
public void runInjectorPipeline(String inputFile, String topic) {
  DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
  copiedOptions.setStreaming(false);
  copiedOptions.setWorkerHarnessContainerImage(
      DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE);
  copiedOptions.setNumWorkers(
      options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
  copiedOptions.setJobName(options.getJobName() + "-injector");
  Pipeline injectorPipeline = Pipeline.create(copiedOptions);
  injectorPipeline.apply(TextIO.Read.from(inputFile))
                  .apply(IntraBundleParallelization
                      .of(PubsubFileInjector.publish(topic))
                      .withMaxParallelism(20));
  DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
  jobsToCancel.add(injectorJob);
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:23,代码来源:DataflowExampleUtils.java

示例5: doGet

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
@Override
public void doGet(HttpServletRequest req, HttpServletResponse resp)
    throws IOException {
  PrintWriter out = resp.getWriter();

  Preconditions.checkNotNull(Constants.ORG_ID);
  Preconditions.checkNotNull(Constants.OUTPUT_PREFIX);
  Preconditions.checkNotNull(Constants.DATAFLOW_STAGING);

  PipelineOptions options;
  if (CloudUtil.willExecuteOnCloud()) {
    options = getCloudExecutionOptions(Constants.DATAFLOW_STAGING);
  } else {
    options = getLocalExecutionOptions();
  }

  new ExportedServiceAccountKeyRemover(options, Constants.ORG_ID)
      .attachSink(TextIO.Write.named("Write output messages").to(Constants.OUTPUT_PREFIX))
      .run();
  out.println("Test passed! The output was written to GCS");
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:22,代码来源:UserManagedKeysApp.java

示例6: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Main function for the runner.
 * @param args The args this program was called with.
 * @throws IOException Thrown if there's an error reading from one of the APIs.
 */
public static void main(String[] args) throws IOException {
  Preconditions.checkNotNull(Constants.ORG_NAME);
  Preconditions.checkNotNull(Constants.POLICY_BUCKET);
  Preconditions.checkNotNull(Constants.OUTPUT_PREFIX);
  Preconditions.checkNotNull(Constants.DATAFLOW_STAGING);
  GCSFilesSource source = null;
  try {
    source = new GCSFilesSource(Constants.POLICY_BUCKET, Constants.ORG_NAME);
  } catch (GeneralSecurityException e) {
    throw new IOException("SecurityException: Cannot create GCSFileSource");
  }
  PipelineOptions options;
  if (CloudUtil.willExecuteOnCloud()) {
    options = getCloudExecutionOptions(Constants.DATAFLOW_STAGING);
  } else {
    options = getLocalExecutionOptions();
  }
  new OnDemandLiveStateChecker(options, source)
      .attachSink(TextIO.Write.named("Write messages to GCS").to(Constants.OUTPUT_PREFIX))
      .run();
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:27,代码来源:LiveStateCheckerRunner.java

示例7: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
	
	String[] arguments = {
			String.format("--output=%s/output.txt", System.getProperty("java.io.tmpdir"))
	};
	
	Options options = PipelineOptionsFactory.fromArgs(arguments)
			.withValidation().as(Options.class);
	options.setRunner(FlinkLocalPipelineRunner.class);
	
	Pipeline p = Pipeline.create(options);

	p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
			.apply(new CountWords())
			.apply(TextIO.Write.named("WriteCounts")
					.to(options.getOutput())
					.withNumShards(options.getNumShards()));

	p.run();
}
 
开发者ID:StephanEwen,项目名称:flink-dataflow,代码行数:21,代码来源:GoogleWordCountExampleRunner.java

示例8: translateNode

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
@Override
public void translateNode(TransformTreeNode node, Bound<String> transform, TranslationContext translation) {
	String path = transform.getFilepattern();
	String name = transform.getName(); 
	Coder<?> coder = transform.getDefaultOutputCoder(transform.getOutput());
	
	if (coder != null && coder != TextIO.DEFAULT_TEXT_CODER) {
		throw new UnsupportedOperationException("Currently only supports UTF-8 inputs.");
	}
	
	DataSource<String> source = translation.getExecutionEnvironment().readTextFile(path);
	if (name != null) {
		source = source.name(name);
	}
	
	translation.registerDataSet(source, node);
}
 
开发者ID:StephanEwen,项目名称:flink-dataflow,代码行数:18,代码来源:FlinkTransformTranslators.java

示例9: writeText

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
private static <T> TransformEvaluator<TextIO.Write.Bound<T>> writeText() {
  return new TransformEvaluator<TextIO.Write.Bound<T>>() {
    @Override
    public void evaluate(TextIO.Write.Bound<T> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      JavaPairRDD<T, Void> last =
          ((JavaRDDLike<WindowedValue<T>, ?>) context.getInputRDD(transform))
          .map(WindowingHelpers.<T>unwindowFunction())
          .mapToPair(new PairFunction<T, T,
                  Void>() {
            @Override
            public Tuple2<T, Void> call(T t) throws Exception {
              return new Tuple2<>(t, null);
            }
          });
      ShardTemplateInformation shardTemplateInfo =
          new ShardTemplateInformation(transform.getNumShards(),
              transform.getShardTemplate(), transform.getFilenamePrefix(),
              transform.getFilenameSuffix());
      writeHadoopFile(last, new Configuration(), shardTemplateInfo, Text.class,
          NullWritable.class, TemplatedTextOutputFormat.class);
    }
  };
}
 
开发者ID:shakamunyi,项目名称:spark-dataflow,代码行数:25,代码来源:TransformTranslator.java

示例10: apply

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
@Override
public PCollection<KV<String, String>> apply(PInput input) {
  Pipeline pipeline = input.getPipeline();

  // Create one TextIO.Read transform for each data file
  // and add its output to a PCollectionList.
  PCollectionList<KV<String, String>> filesToLines = PCollectionList.empty(pipeline);

  for (final String fileLocation : files) {
    PTransform<PInput, PCollection<String>> inputSource
        = TextIO.Read.from(fileLocation)
            .named("TextIO.Read(" + fileLocation + ")");

    PCollection<KV<String, String>> oneFileToLines = pipeline
        .apply(inputSource)
        .apply(WithKeys.<String, String>of(fileLocation));

    filesToLines = filesToLines.and(oneFileToLines);
  }

  return filesToLines.apply(Flatten.<KV<String, String>> pCollections())
                     .setCoder(getDefaultOutputCoder());
}
 
开发者ID:GoogleCloudPlatform,项目名称:dataflow-precipitation-pipeline,代码行数:24,代码来源:ReadDataWithFileName.java

示例11: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
  // CloudBigtableOptions is one way to retrieve the options.  It's not required.
  // https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java
  BigtableCsvOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableCsvOptions.class);
  CloudBigtableTableConfiguration config =
      CloudBigtableTableConfiguration.fromCBTOptions(options);

  Pipeline p = Pipeline.create(options);

  CloudBigtableIO.initializeForWrite(p);

  PCollection<KV<String, Integer>> ngrams =
      applyPipelineToParseBooks(p.apply(TextIO.Read.from(options.getInputFile())));
  PCollection<Mutation> mutations = ngrams.apply(ParDo.of(ENCODE_NGRAM));
  mutations.apply(CloudBigtableIO.writeToTable(config));

  // Run the pipeline.
  p.run();
}
 
开发者ID:GoogleCloudPlatform,项目名称:cloud-bigtable-examples,代码行数:21,代码来源:LoadBooks.java

示例12: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
/** Run a batch pipeline. */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from a CSV file and parse them.
  pipeline
      .apply(TextIO.Read.from(options.getInput()))
      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
      .apply(
          "AddEventTimestamps", WithTimestamps.of((GameEvent i) -> new Instant(i.getTimestamp())))
      .apply("WindowedTeamScore", new WindowedTeamScore(Duration.standardMinutes(60)))
      // Write the results to BigQuery.
      .apply(ParDo.named("FormatTeamScoreSums").of(new FormatTeamScoreSumsFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(FormatTeamScoreSumsFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:28,代码来源:Exercise2.java

示例13: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
	StorageToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
			.as(StorageToDatastoreOptions.class);
	Pipeline p = Pipeline.create(options);

	p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile())).apply(new CSVToDatastore())
			.apply(DatastoreIO.v1().write().withProjectId(options.getProject()));

	p.run();
}
 
开发者ID:topgate,项目名称:retail-demo,代码行数:11,代码来源:StorageToDatastore.java

示例14: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
  WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
    .as(WordCountOptions.class);
  Pipeline p = Pipeline.create(options);

  // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
  // static FormatAsTextFn() to the ParDo transform.
  p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
   .apply(new CountWords())
   .apply(ParDo.of(new FormatAsTextFn()))
   .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));

  p.run();
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:15,代码来源:WordCount.java

示例15: main

import com.google.cloud.dataflow.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
  WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
    .as(WordCountOptions.class);
  Pipeline p = Pipeline.create(options);

  PCollection<KV<String, Long>> filteredWords =
      p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
       .apply(new WordCount.CountWords())
       .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));

  /**
   * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
   * Hamcrest's collection matchers that can be used when writing Pipeline level tests
   * to validate the contents of PCollections. DataflowAssert is best used in unit tests
   * with small data sets but is demonstrated here as a teaching tool.
   *
   * <p>Below we verify that the set of filtered words matches our expected counts. Note
   * that DataflowAssert does not provide any output and that successful completion of the
   * Pipeline implies that the expectations were met. Learn more at
   * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
   * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
   */
  List<KV<String, Long>> expectedResults = Arrays.asList(
      KV.of("Flourish", 3L),
      KV.of("stomach", 1L));
  DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);

  p.run();
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:30,代码来源:DebuggingWordCount.java


注:本文中的com.google.cloud.dataflow.sdk.io.TextIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。