本文整理汇总了Java中cascading.flow.FlowProcess类的典型用法代码示例。如果您正苦于以下问题:Java FlowProcess类的具体用法?Java FlowProcess怎么用?Java FlowProcess使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FlowProcess类属于cascading.flow包,在下文中一共展示了FlowProcess类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: sinkConfInit
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
conf.setOutputFormat(EsOutputFormat.class);
// define an output dir to prevent Cascading from setting up a TempHfs and overriding the OutputFormat
Settings set = loadSettings(conf, false);
Log log = LogFactory.getLog(EsTap.class);
InitializationUtils.setValueWriterIfNotSet(set, CascadingValueWriter.class, log);
InitializationUtils.setValueReaderIfNotSet(set, JdkValueReader.class, log);
InitializationUtils.setBytesConverterIfNeeded(set, CascadingLocalBytesConverter.class, log);
InitializationUtils.setFieldExtractorIfNotSet(set, CascadingFieldExtractor.class, log);
// NB: we need to set this property even though it is not being used - and since and URI causes problem, use only the resource/file
//conf.set("mapred.output.dir", set.getTargetUri() + "/" + set.getTargetResource());
HadoopCfgUtils.setFileOutputFormatDir(conf, set.getResourceWrite());
HadoopCfgUtils.setOutputCommitterClass(conf, EsOutputFormat.EsOldAPIOutputCommitter.class.getName());
if (log.isTraceEnabled()) {
log.trace("Initialized (sink) configuration " + HadoopCfgUtils.asProperties(conf));
}
}
示例2: openForRead
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, ScrollQuery input) throws IOException {
if (input == null) {
// get original copy
Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);
// will be closed by the query is finished
RestRepository client = new RestRepository(settings);
Field mapping = client.getMapping();
Collection<String> fields = CascadingUtils.fieldToAlias(settings, getSourceFields());
// validate if possible
FieldPresenceValidation validation = settings.getReadFieldExistanceValidation();
if (validation.isRequired()) {
MappingUtils.validateMapping(fields, mapping, validation, log);
}
input = QueryBuilder.query(settings).fields(StringUtils.concatenateAndUriEncode(fields, ",")).
build(client, new ScrollReader(new ScrollReaderConfig(new JdkValueReader(), mapping, settings)));
}
return new TupleEntrySchemeIterator<Properties, ScrollQuery>(flowProcess, getScheme(), input, getIdentifier());
}
示例3: validateFields
import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
* Method to validate Fields passed present in the headers.
*/
protected boolean validateFields(FlowProcess<JobConf> flowProcess, Tap tap, Fields sourceFields) {
CSVRecord headerRecord = getHeaderRecord(flowProcess, tap);
if (sourceFields.size() > headerRecord.size()) {
return false;
}
List<String> recordList = new ArrayList<String>();
for (int i = 0; i < headerRecord.size(); i++) {
recordList.add(headerRecord.get(i));
}
for (int i = 0; i < sourceFields.size(); i++) {
if (!recordList.contains(sourceFields.get(i))) {
return false;
}
}
return true;
}
示例4: testPaths
import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
* Tests the content of an output path against the given expected path.
*/
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {
Tap outputTest = new Hfs(new TextLine(), actual);
Tap expectedTest = new Hfs(new TextLine(), expected);
FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);
List<String> outputList = new ArrayList<>();
while (outputIterator.hasNext()) {
outputList.add(outputIterator.next().getTuple().getString(1));
}
List<String> expectedList = new ArrayList<>();
while (expectedIterator.hasNext()) {
expectedList.add(expectedIterator.next().getTuple().getString(1));
}
assertTrue(outputList.equals(expectedList));
}
示例5: translateSource
import cascading.flow.FlowProcess; //导入依赖的package包/类
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {
Tap tap = this.getSingle(node.getSourceTaps());
JobConf tapConfig = new JobConf(this.getNodeConfig(node));
tap.sourceConfInit(flowProcess, tapConfig);
tapConfig.set( "cascading.step.source", Tap.id( tap ) );
Fields outFields = tap.getSourceFields();
registerKryoTypes(outFields);
JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
MultiInputFormat.addInputFormat(sourceConfig, tapConfig);
DataSet<Tuple> src = env
.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
.name(tap.getIdentifier())
.setParallelism(dop)
.withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));
return src;
}
示例6: SinkBoundaryInStage
import cascading.flow.FlowProcess; //导入依赖的package包/类
public SinkBoundaryInStage(FlowProcess flowProcess, FlowElement flowElement, FlowNode node) {
super(flowProcess, flowElement);
this.nextStarted = false;
Scope inScope = node.getElementGraph().incomingEdgesOf(flowElement).iterator().next();
Fields inFields;
if(inScope.isEvery()) {
inFields = inScope.getOutGroupingFields();
}
else {
inFields = inScope.getOutValuesFields();
}
this.tupleEntry = new TupleEntry(inFields);
}
示例7: sinkPrepare
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkPrepare( FlowProcess<? extends Configuration> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
if( !( flowProcess instanceof FlowProcessWrapper ) ) {
throw new RuntimeException( "not a flow process wrapper" );
}
if( !"process-default".equals( flowProcess.getProperty( "default" ) ) ) {
throw new RuntimeException( "not default value" );
}
if( !"sink-replace".equals( flowProcess.getProperty( "replace" ) ) ) {
throw new RuntimeException( "not replaced value" );
}
flowProcess = ( (FlowProcessWrapper) flowProcess ).getDelegate();
if( !"process-default".equals( flowProcess.getProperty( "default" ) ) ) {
throw new RuntimeException( "not default value" );
}
if( !"process-replace".equals( flowProcess.getProperty( "replace" ) ) ) {
throw new RuntimeException( "not replaced value" );
}
super.sinkPrepare( flowProcess, sinkCall );
}
示例8: source
import cascading.flow.FlowProcess; //导入依赖的package包/类
/**
* Populates the {@link Corc} with the next value from the {@link RecordReader}. Then copies the values into the
* incoming {@link TupleEntry}.
*/
@Override
public boolean source(FlowProcess<? extends Configuration> flowProcess, SourceCall<Corc, RecordReader> sourceCall)
throws IOException {
Corc corc = sourceCall.getContext();
@SuppressWarnings("unchecked")
boolean next = sourceCall.getInput().next(NullWritable.get(), corc);
if (!next) {
return false;
}
TupleEntry tupleEntry = sourceCall.getIncomingEntry();
for (Comparable<?> fieldName : tupleEntry.getFields()) {
if (ROW_ID_NAME.equals(fieldName)) {
tupleEntry.setObject(ROW_ID_NAME, corc.getRecordIdentifier());
} else {
tupleEntry.setObject(fieldName, corc.get(fieldName.toString()));
}
}
return true;
}
示例9: sinkPrepare
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Override
public void sinkPrepare(final FlowProcess<JobConf> flowProcess, final SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
final StringWriter stringWriter = new StringWriter(4 * 1024);
final CSVWriter csvWriter = createCsvWriter(stringWriter);
sinkCall.setContext(new Object[5]);
sinkCall.getContext()[0] = new Text();
sinkCall.getContext()[1] = stringWriter;
sinkCall.getContext()[2] = Charset.forName(charsetName);
sinkCall.getContext()[3] = csvWriter;
sinkCall.getContext()[4] = new String[getSinkFields().size()];
if (hasHeader) {
final Fields fields = sinkCall.getOutgoingEntry().getFields();
write(sinkCall, fields);
}
}
示例10: compareTaps
import cascading.flow.FlowProcess; //导入依赖的package包/类
public static boolean compareTaps(final Tap source1, final Tap source2, final Configuration conf) throws IOException {
final FlowProcess flowProcess1 = new HadoopFlowProcess(new JobConf(conf));
source1.getScheme().retrieveSourceFields(flowProcess1, source1);
final TupleEntryIterator iter1 = source1.openForRead(new HadoopFlowProcess(new JobConf(conf)));
final FlowProcess flowProcess2 = new HadoopFlowProcess(new JobConf(conf));
source2.getScheme().retrieveSourceFields(flowProcess2, source2);
final TupleEntryIterator iter2 = source2.openForRead(new HadoopFlowProcess(new JobConf(conf)));
if (!iter1.getFields().equals(iter2.getFields()))
return false;
List<Tuple> list1 = new ArrayList<Tuple>();
while (iter1.hasNext())
list1.add(new Tuple(iter1.next().getTuple()));
iter1.close();
Collections.sort(list1);
List<Tuple> list2 = new ArrayList<Tuple>();
while (iter2.hasNext())
list2.add(new Tuple(iter2.next().getTuple()));
iter2.close();
Collections.sort(list2);
return list1.equals(list2);
}
示例11: complete
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void complete() {
@SuppressWarnings("unchecked")
List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
.newGroup(1)
.addTuple("a")
.addTuple("b")
.newGroup(2)
.addTuple("c")
.addTuple("d")
.build()
.complete(mock(FlowProcess.class), new FirstNBuffer(1))
.result()
.asTupleEntryList();
assertThat(actual.size(), is(2));
assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}
示例12: completeDifferentOutputFields
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
.outputFields(OUTPUT)
.newGroup(1)
.addTuple("a")
.addTuple("b")
.newGroup(2)
.addTuple("c")
.addTuple("d")
.build()
.complete(mock(FlowProcess.class), new CountBuffer())
.result()
.asTupleEntryList();
assertThat(actual.size(), is(4));
assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
assertThat(actual.get(1), tupleEntry(OUTPUT, 2));
assertThat(actual.get(2), tupleEntry(OUTPUT, 1));
assertThat(actual.get(3), tupleEntry(OUTPUT, 2));
}
示例13: complete
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void complete() {
List<TupleEntry> actual = new AggregatorCallStub.Builder<Tuple[]>(GROUP_FIELDS, NON_GROUP_FIELDS)
.newGroup(1)
.addTuple("a")
.addTuple("b")
.newGroup(2)
.addTuple("c")
.addTuple("d")
.build()
.complete(mock(FlowProcess.class), new First(NON_GROUP_FIELDS))
.result()
.asTupleEntryList();
assertThat(actual.size(), is(2));
assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}
示例14: completeDifferentOutputFields
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
@SuppressWarnings({ "unchecked", "rawtypes" })
List<TupleEntry> actual = new AggregatorCallStub.Builder(GROUP_FIELDS, NON_GROUP_FIELDS)
.outputFields(OUTPUT_FIELDS)
.newGroup(1)
.addTuple("a")
.addTuple("b")
.newGroup(2)
.addTuple("c")
.addTuple("d")
.build()
.complete(mock(FlowProcess.class), new MaxValue(OUTPUT_FIELDS))
.result()
.asTupleEntryList();
assertThat(actual.size(), is(2));
assertThat(actual.get(0), tupleEntry(OUTPUT_FIELDS, "b"));
assertThat(actual.get(1), tupleEntry(OUTPUT_FIELDS, "d"));
}
示例15: completeDifferentOutputFields
import cascading.flow.FlowProcess; //导入依赖的package包/类
@Test
public void completeDifferentOutputFields() {
@SuppressWarnings("unchecked")
List<TupleEntry> actual = new FunctionCallStub.Builder<Void>(FIELDS)
.outputFields(OUTPUT)
.addTuple("a")
.addTuple("b")
.build()
.complete(mock(FlowProcess.class), new Insert(OUTPUT, 1))
.result()
.asTupleEntryList();
assertThat(actual.size(), is(2));
assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
assertThat(actual.get(1), tupleEntry(OUTPUT, 1));
}