Java TextIO类代码示例

本文整理汇总了Java中org.apache.beam.sdk.io.TextIO类的典型用法代码示例。如果您正苦于以下问题：Java TextIO类的具体用法？Java TextIO怎么用？Java TextIO使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

TextIO类属于org.apache.beam.sdk.io包，在下文中一共展示了TextIO类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
        .as(Options.class);
    options.setRunner(FlinkRunner.class);

    Pipeline p = Pipeline.create(options);

    KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read()
        .withBootstrapServers("192.168.99.100:32771")
        .withTopics(Arrays.asList("beam".split(",")))
        .updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest"))
        .withValueCoder(StringUtf8Coder.of());

    p.apply(kafkaIOReader.withoutMetadata())
        .apply(Values.<String>create())
        .apply(Window.<String>into(
          FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
        .apply(new CountWords())
        .apply(MapElements.via(new FormatAsTextFn()))
        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));

    p.run();
  }

开发者ID:0x0ece，项目名称:beam-starter，代码行数:25，代码来源:StreamWordCount.java

示例2: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/** Run a batch pipeline to calculate hourly team scores. */
public static void main(String[] args) throws Exception {

  Options options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  pipeline
  .apply("ReadLogs", TextIO.read().from(options.getInput()))
  .apply("SetTimestamps", WithTimestamps.of(new SetTimestampFn()))

  .apply("FixedWindows", Window.<String>into(FixedWindows.of(ONE_HOUR)))

  .apply("TeamScores", new CalculateTeamScores(options.getOutputPrefix()));

  pipeline.run();
}

开发者ID:davorbonaci，项目名称:beam-portability-demo，代码行数:18，代码来源:HourlyTeamScore.java

示例3: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Runs the DatastoreToGcs dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToJson", ParDo.of(EntityToJson.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply("JsonToGcs", TextIO.write().to(options.getSavePath())
          .withSuffix(".json"));

  pipeline.run();
}

开发者ID:cobookman，项目名称:teleport，代码行数:28，代码来源:DatastoreToGcs.java

示例4: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Runs the GcsToDatastore dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestJson", TextIO.read()
          .from(options.getJsonPathPrefix()))
      .apply("GcsToEntity", ParDo.of(JsonToEntity.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply(DatastoreIO.v1().write()
          .withProjectId(options.getDatastoreProjectId()));

  pipeline.run();
}

开发者ID:cobookman，项目名称:teleport，代码行数:25，代码来源:GcsToDatastore.java

示例5: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory
            .fromArgs(args)
            .withValidation()
            .as(TemplateOptions.class);
    options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(BigQueryIO.read().from(options.getBigQueryTableName()))
            .apply(ParDo.of(new DoFn<TableRow, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    String commaSep = c.element().values()
                            .stream()
                            .map(cell -> cell.toString().trim())
                            .collect(Collectors.joining("\",\""));
                    c.output(commaSep);
                }
            }))
            .apply(TextIO.write().to(options.getOutputFile())
                    .withoutSharding()
                    .withWritableByteChannelFactory(GZIP)
            );
    pipeline.run();
}

开发者ID:shinesolutions，项目名称:bigquery-table-to-one-file，代码行数:26，代码来源:BigQueryTableToOneFile.java

示例6: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/**
  * Run a batch pipeline.
  */
// [START DocInclude_USMain]
 public static void main(String[] args) throws Exception {
   // Begin constructing a pipeline configured by commandline flags.
   Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
   Pipeline pipeline = Pipeline.create(options);

   // Read events from a text file and parse them.
   pipeline
       .apply(TextIO.read().from(options.getInput()))
       .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))
       // Extract and sum username/score pairs from the event data.
       .apply("ExtractUserScore", new ExtractAndSumScore("user"))
       .apply(
           "WriteUserScoreSums",
           new WriteToText<KV<String, Integer>>(
               options.getOutput(),
               configureOutput(),
               false));

   // Run the batch pipeline.
   pipeline.run().waitUntilFinish();
 }

开发者ID:apache，项目名称:beam，代码行数:26，代码来源:UserScore.java

示例7: expand

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
@Override
public PDone expand(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
  return wordToUriAndTfIdf
      .apply("Format", ParDo.of(new DoFn<KV<String, KV<URI, Double>>, String>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          c.output(String.format("%s,\t%s,\t%f",
              c.element().getKey(),
              c.element().getValue().getKey(),
              c.element().getValue().getValue()));
        }
      }))
      .apply(TextIO.write()
          .to(output)
          .withSuffix(".csv"));
}

开发者ID:apache，项目名称:beam，代码行数:17，代码来源:TfIdf.java

示例8: main

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
public static void main(String[] args) {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  Pipeline p = Pipeline.create(options);

  double samplingThreshold = 0.1;

  p.apply(TextIO.read().from(options.getInput()))
      .apply(MapElements.via(new ParseTableRowJson()))
      .apply(new ComputeTopSessions(samplingThreshold))
      .apply("Write", TextIO.write().withoutSharding().to(options.getOutput()));

  p.run().waitUntilFinish();
}

开发者ID:apache，项目名称:beam，代码行数:17，代码来源:TopWikipediaSessions.java

示例9: testReadPipeline

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
@Test
@Ignore("TestPipeline needs a way to take in HadoopFileSystemOptions")
public void testReadPipeline() throws Exception {
  create("testFileA", "testDataA".getBytes());
  create("testFileB", "testDataB".getBytes());
  create("testFileC", "testDataC".getBytes());

  HadoopFileSystemOptions options = TestPipeline.testingPipelineOptions()
      .as(HadoopFileSystemOptions.class);
  options.setHdfsConfiguration(ImmutableList.of(fileSystem.fileSystem.getConf()));
  FileSystems.setDefaultPipelineOptions(options);
  PCollection<String> pc = p.apply(
      TextIO.read().from(testPath("testFile*").toString()));
  PAssert.that(pc).containsInAnyOrder("testDataA", "testDataB", "testDataC");
  p.run();
}

开发者ID:apache，项目名称:beam，代码行数:17，代码来源:HadoopFileSystemTest.java

示例10: testText

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
@Test
public void testText() throws Exception {
  PCollection<String> inputWords = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
  PCollection<String> output = inputWords.apply(new WordCount.CountWords())
      .apply(MapElements.via(new WordCount.FormatAsTextFn()));
  output.apply(
      TextIO.write().to(outputDir.getAbsolutePath()).withNumShards(3).withSuffix(".txt"));
  p.run().waitUntilFinish();

  int count = 0;
  Set<String> expected = Sets.newHashSet("hi: 5", "there: 1", "sue: 2", "bob: 2");
  for (File f : tmpDir.getRoot().listFiles(new FileFilter() {
    @Override public boolean accept(File pathname) {
      return pathname.getName().matches("out-.*\\.txt");
    }
  })) {
    count++;
    for (String line : Files.readLines(f, Charsets.UTF_8)) {
      assertTrue(line + " not found", expected.remove(line));
    }
  }
  assertEquals(3, count);
  assertTrue(expected.isEmpty());
}

开发者ID:apache，项目名称:beam，代码行数:25，代码来源:NumShardsTest.java

示例11: runProgram

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
private static void runProgram(String resultPath) throws Exception {

    Pipeline p = FlinkTestPipeline.createForBatch();

    PCollection<String> result = p
        .apply(GenerateSequence.from(0).to(10))
        .apply(ParDo.of(new DoFn<Long, String>() {
          @ProcessElement
          public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().toString());
          }
        }));

    result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));

    p.run();
  }

开发者ID:apache，项目名称:beam，代码行数:18，代码来源:ReadSourceITCase.java

示例12: runProgram

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
private static void runProgram(String resultPath) {

    Pipeline p = FlinkTestPipeline.createForStreaming();

    p
      .apply(GenerateSequence.from(0).to(10))
      .apply(ParDo.of(new DoFn<Long, String>() {
          @ProcessElement
          public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().toString());
          }
        }))
      .apply(TextIO.write().to(resultPath));

    p.run();
  }

开发者ID:apache，项目名称:beam，代码行数:17，代码来源:ReadSourceStreamingITCase.java

示例13: createPredefinedStep

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Returns a Step for a {@link DoFn} by creating and translating a pipeline.
 */
private static Step createPredefinedStep() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
  Pipeline pipeline = Pipeline.create(options);
  String stepName = "DoFn1";
  pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in"))
      .apply(stepName, ParDo.of(new NoOpFn()))
      .apply("WriteMyFile", TextIO.write().to("gs://bucket/out"));
  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  Job job =
      translator
          .translate(
              pipeline,
              runner,
              Collections.<DataflowPackage>emptyList())
          .getJob();

  assertEquals(8, job.getSteps().size());
  Step step = job.getSteps().get(1);
  assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
  assertAllStepOutputsHaveUniqueIds(job);
  return step;
}

开发者ID:apache，项目名称:beam，代码行数:28，代码来源:DataflowPipelineTranslatorTest.java

示例14: testUnconsumedReads

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
/**
 * Tests that all reads are consumed by at least one {@link PTransform}.
 */
@Test
public void testUnconsumedReads() throws IOException {
  DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
  RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
  Pipeline p = buildDataflowPipeline(dataflowOptions);
  PCollection<String> unconsumed = p.apply(TextIO.read().from(options.getInput()));
  DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p);
  final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean();
  p.traverseTopologically(new PipelineVisitor.Defaults() {
    @Override
    public void visitPrimitiveTransform(Node node) {
      unconsumedSeenAsInput.set(true);
    }
  });
  assertThat(unconsumedSeenAsInput.get(), is(true));
}

开发者ID:apache，项目名称:beam，代码行数:20，代码来源:DataflowRunnerTest.java

示例15: expand

import org.apache.beam.sdk.io.TextIO; //导入依赖的package包/类
@Override
public PDone expand(PCollection<String> input) {
  // Verify that the input has a compatible window type.
  checkArgument(
      input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder());

  // filenamePrefix may contain a directory and a filename component. Pull out only the filename
  // component from that path for the PerWindowFiles.
  String prefix = "";
  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  if (!resource.isDirectory()) {
    prefix = verifyNotNull(
        resource.getFilename(),
        "A non-directory resource should have a non-null filename: %s",
        resource);
  }

  return input.apply(
      TextIO.write()
          .to(resource.getCurrentDirectory())
          .withFilenamePolicy(new PerWindowFiles(prefix))
          .withWindowedWrites()
          .withNumShards(3));
}

开发者ID:GoogleCloudPlatform，项目名称:DataflowSDK-examples，代码行数:25，代码来源:WriteToText.java

注：本文中的org.apache.beam.sdk.io.TextIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。