当前位置: 首页>>代码示例>>Java>>正文


Java FlowProcess类代码示例

本文整理汇总了Java中cascading.flow.FlowProcess的典型用法代码示例。如果您正苦于以下问题:Java FlowProcess类的具体用法?Java FlowProcess怎么用?Java FlowProcess使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


FlowProcess类属于cascading.flow包,在下文中一共展示了FlowProcess类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: sinkConfInit

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {

    conf.setOutputFormat(EsOutputFormat.class);
    // define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
    Settings set = loadSettings(conf, false);

    Log log = LogFactory.getLog(EsTap.class);
    InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
    InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
    InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
    InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);

    // NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
    //conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
    HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
    HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());

    if (log.isTraceEnabled()) {
        log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
    }
}
 
开发者ID:xushjie1987,项目名称:es-hadoop-v2.2.0,代码行数:23,代码来源:EsHadoopScheme.java

示例2: openForRead

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, ScrollQuery input) throws IOException {
    if (input == null) {
        // get original copy
        Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);

        // will be closed by the query is finished
        RestRepository client = new RestRepository(settings);
        Field mapping = client.getMapping();
        Collection<String> fields = CascadingUtils.fieldToAlias(settings, getSourceFields());

        // validate if possible
        FieldPresenceValidation validation = settings.getReadFieldExistanceValidation();
        if (validation.isRequired()) {
            MappingUtils.validateMapping(fields, mapping, validation, log);
        }

        input = QueryBuilder.query(settings).fields(StringUtils.concatenateAndUriEncode(fields,  ",")).
                build(client, new ScrollReader(new ScrollReaderConfig(new JdkValueReader(), mapping, settings)));
    }
    return new TupleEntrySchemeIterator<Properties, ScrollQuery>(flowProcess, getScheme(), input, getIdentifier());
}
 
开发者ID:xushjie1987,项目名称:es-hadoop-v2.2.0,代码行数:23,代码来源:EsLocalTap.java

示例3: validateFields

import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
 * Method to validate Fields passed present in the headers.
 */
protected boolean validateFields(FlowProcess<JobConf> flowProcess, Tap tap, Fields sourceFields) {

  CSVRecord headerRecord = getHeaderRecord(flowProcess, tap);

  if (sourceFields.size() > headerRecord.size()) {
    return false;
  }
  List<String> recordList = new ArrayList<String>();

  for (int i = 0; i < headerRecord.size(); i++) {
    recordList.add(headerRecord.get(i));
  }

  for (int i = 0; i < sourceFields.size(); i++) {
    if (!recordList.contains(sourceFields.get(i))) {
      return false;
    }
  }
  return true;

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:25,代码来源:CsvScheme.java

示例4: testPaths

import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
 * Tests the content of an output path against the given expected path.
 */
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {

  Tap outputTest = new Hfs(new TextLine(), actual);
  Tap expectedTest = new Hfs(new TextLine(), expected);

  FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
  FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));

  TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
  TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);

  List<String> outputList = new ArrayList<>();
  while (outputIterator.hasNext()) {
    outputList.add(outputIterator.next().getTuple().getString(1));
  }

  List<String> expectedList = new ArrayList<>();
  while (expectedIterator.hasNext()) {
    expectedList.add(expectedIterator.next().getTuple().getString(1));
  }

  assertTrue(outputList.equals(expectedList));

}
 
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java

示例5: translateSource

import cascading.flow.FlowProcess; //导入依赖的package包/类
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {

		Tap tap = this.getSingle(node.getSourceTaps());
		JobConf tapConfig = new JobConf(this.getNodeConfig(node));
		tap.sourceConfInit(flowProcess, tapConfig);
		tapConfig.set( "cascading.step.source", Tap.id( tap ) );

		Fields outFields = tap.getSourceFields();
		registerKryoTypes(outFields);

		JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
		MultiInputFormat.addInputFormat(sourceConfig, tapConfig);

		DataSet<Tuple> src = env
				.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
						.name(tap.getIdentifier())
						.setParallelism(dop)
						.withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));

		return src;

	}
 
开发者ID:dataArtisans,项目名称:cascading-flink,代码行数:23,代码来源:FlinkFlowStep.java

示例6: SinkBoundaryInStage

import cascading.flow.FlowProcess; //导入依赖的package包/类
public SinkBoundaryInStage(FlowProcess flowProcess, FlowElement flowElement, FlowNode node) {
	super(flowProcess, flowElement);
	this.nextStarted = false;

	Scope inScope = node.getElementGraph().incomingEdgesOf(flowElement).iterator().next();

	Fields inFields;
	if(inScope.isEvery()) {
		inFields = inScope.getOutGroupingFields();
	}
	else {
		inFields = inScope.getOutValuesFields();
	}

	this.tupleEntry = new TupleEntry(inFields);
}
 
开发者ID:dataArtisans,项目名称:cascading-flink,代码行数:17,代码来源:SinkBoundaryInStage.java

示例7: sinkPrepare

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkPrepare( FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
	if( !( flowProcess instanceof FlowProcessWrapper ) ) {
		throw new RuntimeException( "not a flow process wrapper" );
	}

	if( !"process-default".equals( flowProcess.getProperty( "default" ) ) ) {
		throw new RuntimeException( "not default value" );
	}

	if( !"sink-replace".equals( flowProcess.getProperty( "replace" ) ) ) {
		throw new RuntimeException( "not replaced value" );
	}

	flowProcess = ( (FlowProcessWrapper) flowProcess ).getDelegate();

	if( !"process-default".equals( flowProcess.getProperty( "default" ) ) ) {
		throw new RuntimeException( "not default value" );
	}

	if( !"process-replace".equals( flowProcess.getProperty( "replace" ) ) ) {
		throw new RuntimeException( "not replaced value" );
	}

	super.sinkPrepare( flowProcess, sinkCall );
}
 
开发者ID:dataArtisans,项目名称:cascading-flink,代码行数:27,代码来源:FlinkConfigDefScheme.java

示例8: source

import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
 * Populates the {@link Corc} with the next value from the {@link RecordReader}. Then copies the values into the
 * incoming {@link TupleEntry}.
 */
@Override
public boolean source(FlowProcess<? extends Configuration> flowProcess, SourceCall<Corc, RecordReader> sourceCall)
    throws IOException {
  Corc corc = sourceCall.getContext();
  @SuppressWarnings("unchecked")
  boolean next = sourceCall.getInput().next(NullWritable.get(), corc);
  if (!next) {
    return false;
  }
  TupleEntry tupleEntry = sourceCall.getIncomingEntry();
  for (Comparable<?> fieldName : tupleEntry.getFields()) {
    if (ROW_ID_NAME.equals(fieldName)) {
      tupleEntry.setObject(ROW_ID_NAME, corc.getRecordIdentifier());
    } else {
      tupleEntry.setObject(fieldName, corc.get(fieldName.toString()));
    }
  }
  return true;
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:24,代码来源:OrcFile.java

示例9: sinkPrepare

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkPrepare(final FlowProcess<JobConf> flowProcess, final SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
    final StringWriter stringWriter = new StringWriter(4 * 1024);
    final CSVWriter csvWriter = createCsvWriter(stringWriter);
    sinkCall.setContext(new Object[5]);
    sinkCall.getContext()[0] = new Text();
    sinkCall.getContext()[1] = stringWriter;
    sinkCall.getContext()[2] = Charset.forName(charsetName);
    sinkCall.getContext()[3] = csvWriter;
    sinkCall.getContext()[4] = new String[getSinkFields().size()];

    if (hasHeader) {
        final Fields fields = sinkCall.getOutgoingEntry().getFields();
        write(sinkCall, fields);
    }
}
 
开发者ID:tresata,项目名称:cascading-opencsv,代码行数:17,代码来源:OpenCsvScheme.java

示例10: compareTaps

import cascading.flow.FlowProcess; //导入依赖的package包/类
public static boolean compareTaps(final Tap source1, final Tap source2, final Configuration conf) throws IOException {
    final FlowProcess flowProcess1 = new HadoopFlowProcess(new JobConf(conf));
    source1.getScheme().retrieveSourceFields(flowProcess1, source1);
    final TupleEntryIterator iter1 = source1.openForRead(new HadoopFlowProcess(new JobConf(conf)));
    final FlowProcess flowProcess2 = new HadoopFlowProcess(new JobConf(conf));
    source2.getScheme().retrieveSourceFields(flowProcess2, source2);
    final TupleEntryIterator iter2 = source2.openForRead(new HadoopFlowProcess(new JobConf(conf)));
    if (!iter1.getFields().equals(iter2.getFields()))
        return false;
    List<Tuple> list1 = new ArrayList<Tuple>();
    while (iter1.hasNext())
        list1.add(new Tuple(iter1.next().getTuple()));
    iter1.close();
    Collections.sort(list1);
    List<Tuple> list2 = new ArrayList<Tuple>();
    while (iter2.hasNext())
        list2.add(new Tuple(iter2.next().getTuple()));
    iter2.close();
    Collections.sort(list2);
    return list1.equals(list2);
}
 
开发者ID:tresata,项目名称:cascading-opencsv,代码行数:22,代码来源:OpenCsvSchemeTest.java

示例11: complete

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void complete() {
  @SuppressWarnings("unchecked")
  List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new FirstNBuffer(1))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
  assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}
 
开发者ID:HotelsDotCom,项目名称:plunger,代码行数:20,代码来源:BufferCallStubTest.java

示例12: completeDifferentOutputFields

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
  List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .outputFields(OUTPUT)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new CountBuffer())
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(4));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 2));
  assertThat(actual.get(2), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(3), tupleEntry(OUTPUT, 2));
}
 
开发者ID:HotelsDotCom,项目名称:plunger,代码行数:22,代码来源:BufferCallStubTest.java

示例13: complete

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void complete() {
  List<TupleEntry> actual = new AggregatorCallStub.Builder<Tuple[]>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new First(NON_GROUP_FIELDS))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
  assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}
 
开发者ID:HotelsDotCom,项目名称:plunger,代码行数:19,代码来源:AggregatorCallStubTest.java

示例14: completeDifferentOutputFields

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
  @SuppressWarnings({ "unchecked", "rawtypes" })
  List<TupleEntry> actual = new AggregatorCallStub.Builder(GROUP_FIELDS, NON_GROUP_FIELDS)
      .outputFields(OUTPUT_FIELDS)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new MaxValue(OUTPUT_FIELDS))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(OUTPUT_FIELDS, "b"));
  assertThat(actual.get(1), tupleEntry(OUTPUT_FIELDS, "d"));
}
 
开发者ID:HotelsDotCom,项目名称:plunger,代码行数:21,代码来源:AggregatorCallStubTest.java

示例15: completeDifferentOutputFields

import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
  @SuppressWarnings("unchecked")
  List<TupleEntry> actual = new FunctionCallStub.Builder<Void>(FIELDS)
      .outputFields(OUTPUT)
      .addTuple("a")
      .addTuple("b")
      .build()
      .complete(mock(FlowProcess.class), new Insert(OUTPUT, 1))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 1));
}
 
开发者ID:HotelsDotCom,项目名称:plunger,代码行数:17,代码来源:FunctionCallStubTest.java


注:本文中的cascading.flow.FlowProcess类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。