当前位置: 首页>>代码示例>>Java>>正文


Java PCollection.setCoder方法代码示例

本文整理汇总了Java中org.apache.beam.sdk.values.PCollection.setCoder方法的典型用法代码示例。如果您正苦于以下问题:Java PCollection.setCoder方法的具体用法?Java PCollection.setCoder怎么用?Java PCollection.setCoder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.beam.sdk.values.PCollection的用法示例。


在下文中一共展示了PCollection.setCoder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildBeamPipeline

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
    , BeamSqlEnv sqlEnv) throws Exception {
  RelNode input = getInput();
  String stageName = BeamSqlRelUtils.getStageName(this);

  PCollection<BeamRecord> upstream =
      BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);

  BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(this);

  PCollection<BeamRecord> projectStream = upstream.apply(stageName, ParDo
      .of(new BeamSqlProjectFn(getRelTypeName(), executor,
          CalciteUtils.toBeamRowType(rowType))));
  projectStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());

  return projectStream;
}
 
开发者ID:apache,项目名称:beam,代码行数:19,代码来源:BeamProjectRel.java

示例2: buildBeamPipeline

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
    , BeamSqlEnv sqlEnv) throws Exception {
  RelNode input = getInput();
  String stageName = BeamSqlRelUtils.getStageName(this);

  PCollection<BeamRecord> upstream =
      BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);

  BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(this);

  PCollection<BeamRecord> filterStream = upstream.apply(stageName,
      ParDo.of(new BeamSqlFilterFn(getRelTypeName(), executor)));
  filterStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());

  return filterStream;
}
 
开发者ID:apache,项目名称:beam,代码行数:18,代码来源:BeamFilterRel.java

示例3: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, V>> expand(PCollection<V> in) {
  PCollection<KV<K, V>> result =
      in.apply("AddKeys", MapElements.via(new SimpleFunction<V, KV<K, V>>() {
        @Override
        public KV<K, V> apply(V element) {
          return KV.of(fn.apply(element), element);
        }
      }));

  try {
    Coder<K> keyCoder;
    CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
    if (keyClass == null) {
      keyCoder = coderRegistry.getOutputCoder(fn, in.getCoder());
    } else {
      keyCoder = coderRegistry.getCoder(TypeDescriptor.of(keyClass));
    }
    // TODO: Remove when we can set the coder inference context.
    result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
  } catch (CannotProvideCoderException exc) {
    // let lazy coder inference have a try
  }

  return result;
}
 
开发者ID:apache,项目名称:beam,代码行数:27,代码来源:WithKeys.java

示例4: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<OutputT> expand(PCollection<? extends InputT> input) {
  CoderRegistry registry = input.getPipeline().getCoderRegistry();
  finishSpecifyingStateSpecs(fn, registry, input.getCoder());
  TupleTag<OutputT> mainOutput = new TupleTag<>(MAIN_OUTPUT_TAG);
  PCollection<OutputT> res =
      input.apply(withOutputTags(mainOutput, TupleTagList.empty())).get(mainOutput);
  try {
    res.setCoder(
        registry.getCoder(
            getFn().getOutputTypeDescriptor(),
            getFn().getInputTypeDescriptor(),
            ((PCollection<InputT>) input).getCoder()));
  } catch (CannotProvideCoderException e) {
    // Ignore and leave coder unset.
  }
  return res;
}
 
开发者ID:apache,项目名称:beam,代码行数:19,代码来源:ParDo.java

示例5: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>
expand(PCollection<KV<K, V>> input) {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>)
      input.getWindowingStrategy().getWindowFn().windowCoder();
  @SuppressWarnings("unchecked")
  KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();

  PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash;
  keyedByHash = input.apply(
      ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder)));
  keyedByHash.setCoder(
      KvCoder.of(
          VarIntCoder.of(),
          KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), windowCoder),
              FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));

  return keyedByHash.apply(
      new GroupByKeyAndSortValuesOnly<Integer, KV<K, W>, WindowedValue<V>>());
}
 
开发者ID:apache,项目名称:beam,代码行数:23,代码来源:BatchViewOverrides.java

示例6: applyForSingleton

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
static <T, FinalT, ViewT, W extends BoundedWindow> PCollection<?>
applyForSingleton(
    DataflowRunner runner,
    PCollection<T> input,
    DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
        IsmRecord<WindowedValue<FinalT>>> doFn,
    Coder<FinalT> defaultValueCoder,
    PCollectionView<ViewT> view) {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>)
      input.getWindowingStrategy().getWindowFn().windowCoder();

  IsmRecordCoder<WindowedValue<FinalT>> ismCoder =
      coderForSingleton(windowCoder, defaultValueCoder);

  PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input
      .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
      .apply(ParDo.of(doFn));
  reifiedPerWindowAndSorted.setCoder(ismCoder);

  runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
  reifiedPerWindowAndSorted.apply(
      CreateDataflowView.<IsmRecord<WindowedValue<FinalT>>, ViewT>forBatch(view));
  return reifiedPerWindowAndSorted;
}
 
开发者ID:apache,项目名称:beam,代码行数:27,代码来源:BatchViewOverrides.java

示例7: buildBeamPipeline

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
    , BeamSqlEnv sqlEnv) throws Exception {
  RelNode input = getInput();
  PCollection<BeamRecord> upstream = BeamSqlRelUtils.getBeamRelInput(input)
      .buildBeamPipeline(inputPCollections, sqlEnv);
  Type windowType = upstream.getWindowingStrategy().getWindowFn()
      .getWindowTypeDescriptor().getType();
  if (!windowType.equals(GlobalWindow.class)) {
    throw new UnsupportedOperationException(
        "`ORDER BY` is only supported for GlobalWindow, actual window: " + windowType);
  }

  BeamSqlRowComparator comparator = new BeamSqlRowComparator(fieldIndices, orientation,
      nullsFirst);
  // first find the top (offset + count)
  PCollection<List<BeamRecord>> rawStream =
      upstream.apply("extractTopOffsetAndFetch",
          Top.of(startIndex + count, comparator).withoutDefaults())
      .setCoder(ListCoder.<BeamRecord>of(upstream.getCoder()));

  // strip the `leading offset`
  if (startIndex > 0) {
    rawStream = rawStream.apply("stripLeadingOffset", ParDo.of(
        new SubListFn<BeamRecord>(startIndex, startIndex + count)))
        .setCoder(ListCoder.<BeamRecord>of(upstream.getCoder()));
  }

  PCollection<BeamRecord> orderedStream = rawStream.apply(
      "flatten", Flatten.<BeamRecord>iterables());
  orderedStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());

  return orderedStream;
}
 
开发者ID:apache,项目名称:beam,代码行数:34,代码来源:BeamSortRel.java

示例8: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<T> expand(PBegin input) {
  try {
    Coder<T> coder;
    if (elementCoder.isPresent()) {
      coder = elementCoder.get();
    } else if (typeDescriptor.isPresent()) {
      coder = input.getPipeline().getCoderRegistry().getCoder(typeDescriptor.get());
    } else {
      Iterable<T> rawElements =
          Iterables.transform(
              timestampedElements,
              new Function<TimestampedValue<T>, T>() {
                @Override
                public T apply(TimestampedValue<T> timestampedValue) {
                  return timestampedValue.getValue();
                }
              });
      coder = getDefaultCreateCoder(input.getPipeline().getCoderRegistry(), rawElements);
    }

    PCollection<TimestampedValue<T>> intermediate = Pipeline.applyTransform(input,
        Create.of(timestampedElements).withCoder(TimestampedValueCoder.of(coder)));

    PCollection<T> output = intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
    output.setCoder(coder);
    return output;
  } catch (CannotProvideCoderException e) {
    throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
        + "Please set a coder by invoking CreateTimestamped.withCoder() explicitly.", e);
  }
}
 
开发者ID:apache,项目名称:beam,代码行数:33,代码来源:Create.java

示例9: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public OutputT expand(InputT input) {
  OutputT res = delegate().expand(input);
  if (res instanceof PCollection) {
    PCollection pc = (PCollection) res;
    try {
      pc.setCoder(delegate().getDefaultOutputCoder(input, pc));
    } catch (CannotProvideCoderException e) {
      // Let coder inference happen later.
    }
  }
  return res;
}
 
开发者ID:apache,项目名称:beam,代码行数:14,代码来源:ForwardingPTransform.java

示例10: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public final PCollection<T> expand(PBegin input) {
  try {
    PCollection<T> pc = Pipeline
        .applyTransform(input, new Impulse())
        .apply(ParDo.of(DecodeAndEmitDoFn
            .fromIterable(transform.getElements(), originalOutput.getCoder())));
    pc.setCoder(originalOutput.getCoder());
    return pc;
  } catch (IOException e) {
    throw new IllegalStateException("Unable to encode elements.", e);
  }
}
 
开发者ID:apache,项目名称:beam,代码行数:14,代码来源:DataflowRunner.java

示例11: testFlattenInMemoryEvaluatorWithEmptyPCollectionList

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Test
public void testFlattenInMemoryEvaluatorWithEmptyPCollectionList() throws Exception {
  PCollectionList<Integer> list = PCollectionList.empty(p);

  PCollection<Integer> flattened = list.apply(Flatten.<Integer>pCollections());
  flattened.setCoder(VarIntCoder.of());

  EvaluationContext evaluationContext = mock(EvaluationContext.class);
  when(evaluationContext.createBundle(flattened))
      .thenReturn(bundleFactory.createBundle(flattened));

  FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory(evaluationContext);
  AppliedPTransform<?, ?, ?> flattendProducer = DirectGraphs.getProducer(flattened);
  TransformEvaluator<Integer> emptyEvaluator =
      factory.forApplication(
          flattendProducer,
          bundleFactory.createRootBundle().commit(BoundedWindow.TIMESTAMP_MAX_VALUE));

  TransformResult<Integer> leftSideResult = emptyEvaluator.finishBundle();

  CommittedBundle<?> outputBundle =
      Iterables.getOnlyElement(leftSideResult.getOutputBundles()).commit(Instant.now());
  assertThat(outputBundle.getElements(), emptyIterable());
  assertThat(
      leftSideResult.getTransform(),
      Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattendProducer));
}
 
开发者ID:apache,项目名称:beam,代码行数:28,代码来源:FlattenEvaluatorFactoryTest.java

示例12: buildBeamPipeline

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
    , BeamSqlEnv sqlEnv) throws Exception {
  RelNode input = getInput();
  String stageName = BeamSqlRelUtils.getStageName(this) + "_";

  PCollection<BeamRecord> upstream =
      BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);
  if (windowFieldIdx != -1) {
    upstream = upstream.apply(stageName + "assignEventTimestamp", WithTimestamps
        .of(new BeamAggregationTransforms.WindowTimestampFn(windowFieldIdx))
        .withAllowedTimestampSkew(new Duration(Long.MAX_VALUE)))
        .setCoder(upstream.getCoder());
  }

  PCollection<BeamRecord> windowStream = upstream.apply(stageName + "window",
      Window.into(windowFn)
      .triggering(trigger)
      .withAllowedLateness(allowedLatence)
      .accumulatingFiredPanes());

  BeamRecordCoder keyCoder = exKeyFieldsSchema(input.getRowType()).getRecordCoder();
  PCollection<KV<BeamRecord, BeamRecord>> exCombineByStream = windowStream.apply(
      stageName + "exCombineBy",
      WithKeys
          .of(new BeamAggregationTransforms.AggregationGroupByKeyFn(
              windowFieldIdx, groupSet)))
      .setCoder(KvCoder.of(keyCoder, upstream.getCoder()));


  BeamRecordCoder aggCoder = exAggFieldsSchema().getRecordCoder();

  PCollection<KV<BeamRecord, BeamRecord>> aggregatedStream = exCombineByStream.apply(
      stageName + "combineBy",
      Combine.<BeamRecord, BeamRecord, BeamRecord>perKey(
          new BeamAggregationTransforms.AggregationAdaptor(getAggCallList(),
              CalciteUtils.toBeamRowType(input.getRowType()))))
      .setCoder(KvCoder.of(keyCoder, aggCoder));

  PCollection<BeamRecord> mergedStream = aggregatedStream.apply(stageName + "mergeRecord",
      ParDo.of(new BeamAggregationTransforms.MergeAggregationRecord(
          CalciteUtils.toBeamRowType(getRowType()), getAggCallList(), windowFieldIdx)));
  mergedStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());

  return mergedStream;
}
 
开发者ID:apache,项目名称:beam,代码行数:47,代码来源:BeamAggregationRel.java

示例13: testCountPerElementBasic

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
/**
   * This step equals to below query.
   * <pre>
   * SELECT `f_int`
   * , COUNT(*) AS `size`
   * , SUM(`f_long`) AS `sum1`, AVG(`f_long`) AS `avg1`
   * , MAX(`f_long`) AS `max1`, MIN(`f_long`) AS `min1`
   * , SUM(`f_short`) AS `sum2`, AVG(`f_short`) AS `avg2`
   * , MAX(`f_short`) AS `max2`, MIN(`f_short`) AS `min2`
   * , SUM(`f_byte`) AS `sum3`, AVG(`f_byte`) AS `avg3`
   * , MAX(`f_byte`) AS `max3`, MIN(`f_byte`) AS `min3`
   * , SUM(`f_float`) AS `sum4`, AVG(`f_float`) AS `avg4`
   * , MAX(`f_float`) AS `max4`, MIN(`f_float`) AS `min4`
   * , SUM(`f_double`) AS `sum5`, AVG(`f_double`) AS `avg5`
   * , MAX(`f_double`) AS `max5`, MIN(`f_double`) AS `min5`
   * , MAX(`f_timestamp`) AS `max7`, MIN(`f_timestamp`) AS `min7`
   * ,SUM(`f_int2`) AS `sum8`, AVG(`f_int2`) AS `avg8`
   * , MAX(`f_int2`) AS `max8`, MIN(`f_int2`) AS `min8`
   * FROM TABLE_NAME
   * GROUP BY `f_int`
   * </pre>
   * @throws ParseException
   */
  @Test
  public void testCountPerElementBasic() throws ParseException {
    setupEnvironment();

    PCollection<BeamRecord> input = p.apply(Create.of(inputRows));

    //1. extract fields in group-by key part
    PCollection<KV<BeamRecord, BeamRecord>> exGroupByStream = input.apply("exGroupBy",
        WithKeys
            .of(new BeamAggregationTransforms.AggregationGroupByKeyFn(-1, ImmutableBitSet.of(0))))
        .setCoder(KvCoder.<BeamRecord, BeamRecord>of(keyCoder, inRecordCoder));

    //2. apply a GroupByKey.
    PCollection<KV<BeamRecord, Iterable<BeamRecord>>> groupedStream = exGroupByStream
        .apply("groupBy", GroupByKey.<BeamRecord, BeamRecord>create())
        .setCoder(KvCoder.<BeamRecord, Iterable<BeamRecord>>of(keyCoder,
            IterableCoder.<BeamRecord>of(inRecordCoder)));

    //3. run aggregation functions
    PCollection<KV<BeamRecord, BeamRecord>> aggregatedStream = groupedStream.apply("aggregation",
        Combine.<BeamRecord, BeamRecord, BeamRecord>groupedValues(
            new BeamAggregationTransforms.AggregationAdaptor(aggCalls, inputRowType)))
        .setCoder(KvCoder.<BeamRecord, BeamRecord>of(keyCoder, aggCoder));

    //4. flat KV to a single record
    PCollection<BeamRecord> mergedStream = aggregatedStream.apply("mergeRecord",
        ParDo.of(new BeamAggregationTransforms.MergeAggregationRecord(outputType, aggCalls, -1)));
    mergedStream.setCoder(outRecordCoder);

    //assert function BeamAggregationTransform.AggregationGroupByKeyFn
    PAssert.that(exGroupByStream).containsInAnyOrder(prepareResultOfAggregationGroupByKeyFn());

    //assert BeamAggregationTransform.AggregationCombineFn
    PAssert.that(aggregatedStream).containsInAnyOrder(prepareResultOfAggregationCombineFn());

  //assert BeamAggregationTransform.MergeAggregationRecord
    PAssert.that(mergedStream).containsInAnyOrder(prepareResultOfMergeAggregationRecord());

    p.run();
}
 
开发者ID:apache,项目名称:beam,代码行数:64,代码来源:BeamAggregationTransformTest.java

示例14: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<Void, T>> expand(PCollection<T> input) {
  PCollection output = input.apply(ParDo.of(new VoidKeyToMultimapMaterializationDoFn<>()));
  output.setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
  return output;
}
 
开发者ID:apache,项目名称:beam,代码行数:7,代码来源:View.java

示例15: expand

import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
  // A naive implementation would be to simply stream data directly to BigQuery.
  // However, this could occasionally lead to duplicated data, e.g., when
  // a VM that runs this code is restarted and the code is re-run.

  // The above risk is mitigated in this implementation by relying on
  // BigQuery built-in best effort de-dup mechanism.

  // To use this mechanism, each input TableRow is tagged with a generated
  // unique id, which is then passed to BigQuery and used to ignore duplicates
  // We create 50 keys per BigQuery table to generate output on. This is few enough that we
  // get good batching into BigQuery's insert calls, and enough that we can max out the
  // streaming insert quota.
  PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged =
      input
          .apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50)))
          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of()))
          .apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()))
          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()));

  // To prevent having the same TableRow processed more than once with regenerated
  // different unique ids, this implementation relies on "checkpointing", which is
  // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
  // performed by Reshuffle.
  TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
  TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
  PCollectionTuple tuple = tagged
      .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
      // Put in the global window to ensure that DynamicDestinations side inputs are accessed
      // correctly.
      .apply("GlobalWindow",
          Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
          .triggering(DefaultTrigger.of()).discardingFiredPanes())
      .apply("StreamingWrite",
          ParDo.of(
              new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag))
          .withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
  PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
  failedInserts.setCoder(TableRowJsonCoder.of());
  return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}
 
开发者ID:apache,项目名称:beam,代码行数:43,代码来源:StreamingWriteTables.java


注:本文中的org.apache.beam.sdk.values.PCollection.setCoder方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。