本文整理汇总了Java中org.datacleaner.job.AnalysisJob类的典型用法代码示例。如果您正苦于以下问题:Java AnalysisJob类的具体用法?Java AnalysisJob怎么用?Java AnalysisJob使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AnalysisJob类属于org.datacleaner.job包,在下文中一共展示了AnalysisJob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testScenario
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
public void testScenario() throws Throwable {
final CsvDatastore datastore =
new CsvDatastore("my database", "../core/src/test/resources/example-name-lengths.csv");
final ClasspathScanDescriptorProvider descriptorProvider =
new ClasspathScanDescriptorProvider().scanPackage("org.datacleaner", true);
final DataCleanerConfiguration configuration = new DataCleanerConfigurationImpl().withDatastores(datastore)
.withEnvironment(new DataCleanerEnvironmentImpl().withDescriptorProvider(descriptorProvider));
final AnalysisJob job = new JaxbJobReader(configuration)
.read(new FileInputStream("src/test/resources/example-job-components-without-inputcolumns.xml"));
final AnalysisRunner runner = new AnalysisRunnerImpl(configuration);
final AnalysisResultFuture resultFuture = runner.run(job);
if (!resultFuture.isSuccessful()) {
throw resultFuture.getErrors().get(0);
}
final InputColumn<?>[] input = job.getAnalyzerJobs().iterator().next().getInput();
assertEquals(4, input.length);
final StringAnalyzerResult result = (StringAnalyzerResult) resultFuture.getResults().get(0);
for (int i = 0; i < input.length; i++) {
assertEquals(5, result.getRowCount(input[i]));
}
}
示例2: getComponentByKey
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
private ComponentJob getComponentByKey(final AnalysisJob job, final String queriedKey) {
final List<ComponentJob> componentJobs = CollectionUtils.concat(false, job.getTransformerJobs(),
job.getTransformerJobs(), job.getAnalyzerJobs());
for (final ComponentJob componentJob : componentJobs) {
final String componentKey = getComponentKey(componentJob);
if (queriedKey.equals(componentKey)) {
return componentJob;
}
final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs();
for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) {
final AnalysisJob childJob = outputDataStreamJob.getJob();
if (childJob != null) {
final ComponentJob result = getComponentByKey(childJob, queriedKey);
if (result != null) {
return result;
}
}
}
}
return null;
}
示例3: testSingleResultElement
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Test
public void testSingleResultElement() throws IOException {
final Datastore datastore = TestHelper.createSampleDatabaseDatastore("orderdb");
final SimpleDescriptorProvider descriptorProvider = new SimpleDescriptorProvider();
descriptorProvider.addRendererBeanDescriptor(Descriptors.ofRenderer(ListResultHtmlRenderer.class));
final DataCleanerEnvironment environment =
new DataCleanerEnvironmentImpl().withDescriptorProvider(descriptorProvider);
final DataCleanerConfigurationImpl configuration =
new DataCleanerConfigurationImpl().withDatastores(datastore).withEnvironment(environment);
final AnalysisJob job;
try (AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(configuration)) {
jobBuilder.setDatastore(datastore);
jobBuilder.addSourceColumns("customers.customername");
jobBuilder.addAnalyzer(MockAnalyzer.class).addInputColumns(jobBuilder.getSourceColumns());
job = jobBuilder.toAnalysisJob();
}
final AnalysisResult analysisResult = new AnalysisRunnerImpl(configuration).run(job);
writeAndCompareWithBenchmark(analysisResult, configuration);
}
示例4: testResultPathNameNoVariableSpecified
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Test
public void testResultPathNameNoVariableSpecified() throws Exception {
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("DCTest - " + name.getMethodName());
try (JavaSparkContext sparkContext = new JavaSparkContext(sparkConf)) {
final SparkJobContext sparkJobContext = new SparkJobContext(URI.create("src/test/resources/conf_local.xml"),
URI.create("src/test/resources/vanilla-job.analysis.xml"), null, sparkContext);
final AnalysisJob job = sparkJobContext.getAnalysisJob();
assertNotNull(job);
assertNull(sparkJobContext.getResultPath());
final String analysisJobName = sparkJobContext.getJobName();
assertEquals("vanilla-job", analysisJobName);
final Resource resultResource = ResultFilePathUtils.getResultResource(sparkContext, sparkJobContext);
final int lastIndexOfDash = resultResource.getQualifiedPath().lastIndexOf("-");
assertTrue(resultResource.getQualifiedPath().contains(analysisJobName));
assertTrue(resultResource.getQualifiedPath().substring(0, lastIndexOfDash)
.endsWith("datacleaner" + File.separator + "results" + File.separator + "vanilla-job"));
}
}
示例5: performResultAssertions
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
private void performResultAssertions(final AnalysisJob job, final AnalysisResult result) {
assertEquals(1, result.getResults().size());
Collection<ComponentJob> componentJobs = result.getResultMap().keySet();
componentJobs = CollectionUtils2.sorted(componentJobs, ObjectComparator.getComparator());
assertEquals("[ImmutableAnalyzerJob[name=date gap job,analyzer=Date gap analyzer]]", componentJobs.toString());
// using the original component jobs not only asserts that these exist
// in the result, but also that the their deserialized clones are equal
// (otherwise the results cannot be retrieved from the result map).
final AnalyzerJob analyzerJob = job.getAnalyzerJobs().iterator().next();
final AnalyzerResult analyzerResult = result.getResult(analyzerJob);
assertNotNull(analyzerResult);
assertEquals("DateGapAnalyzerResult[gaps={121=[], 128=[], 141=[], 181=[], 363=[]}]", analyzerResult.toString());
}
开发者ID:datacleaner,项目名称:DataCleaner,代码行数:18,代码来源:AnalyzeDateGapsCompareSchemasAndSerializeResultsTest.java
示例6: getAnalysisJob
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Provides
public AnalysisJob getAnalysisJob(@Nullable final AnalysisJobBuilder builder) {
if (builder == null) {
return null;
}
return builder.toAnalysisJob(false);
}
示例7: getInputColumnConversion
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Override
protected Map<InputColumn<?>, InputColumn<?>> getInputColumnConversion(final AnalysisJob wrappedAnalysisJob) {
final Map<InputColumn<?>, InputColumn<?>> map = new HashMap<>();
final Iterator<InputColumn<?>> sourceColumns = wrappedAnalysisJob.getSourceColumns().iterator();
int i = 0;
while (i < input.length && sourceColumns.hasNext()) {
final InputColumn<?> next = sourceColumns.next();
map.put(input[i], next);
i++;
}
return map;
}
示例8: testWrappedExecution
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
public void testWrappedExecution() throws Throwable {
final AnalysisJob job;
try (AnalysisJobBuilder builder = new AnalysisJobBuilder(_configuration)) {
builder.setDatastore("actual_input");
builder.addSourceColumns("table.name");
builder.addTransformer(MockWrappedAnalysisJobTransformer.class)
.addInputColumn(builder.getSourceColumnByName("name"));
builder.addAnalyzer(MockAnalyzer.class).addInputColumns(builder.getAvailableInputColumns(Object.class));
job = builder.toAnalysisJob();
}
final AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(_configuration).run(job);
resultFuture.await();
if (resultFuture.isErrornous()) {
throw resultFuture.getErrors().get(0);
}
final List<AnalyzerResult> results = resultFuture.getResults();
assertEquals(1, results.size());
@SuppressWarnings("unchecked") final ListResult<InputRow> analyzerResult =
(ListResult<InputRow>) results.get(0);
final List<InputRow> values = analyzerResult.getValues();
assertEquals(4, values.size());
assertEquals("TransformedInputRow[values={"
+ "TransformedInputColumn[id=trans-0001-0002,name=mock output]=mocked: Tomasz},"
+ "delegate=MetaModelInputRow[Row[values=[Tomasz]]]]", values.get(0).toString());
assertEquals("TransformedInputRow[values={"
+ "TransformedInputColumn[id=trans-0001-0002,name=mock output]=mocked: Kasper},"
+ "delegate=MetaModelInputRow[Row[values=[Kasper]]]]", values.get(1).toString());
}
示例9: testSingleRecordOutputScenario
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
public void testSingleRecordOutputScenario() throws Exception {
final TransformerComponentBuilder<MockTransformer> tr1 = ajb.addTransformer(MockTransformer.class);
tr1.addInputColumn(ajb.getSourceColumnByName("name"));
final TransformerComponentBuilder<MockTransformer> tr2 = ajb.addTransformer(MockTransformer.class);
tr2.addInputColumn(tr1.getOutputColumns().get(0));
final AnalyzerComponentBuilder<MockAnalyzer> analyzer = ajb.addAnalyzer(MockAnalyzer.class);
analyzer.addInputColumns(sourceColumns);
final AnalysisJob job = ajb.toAnalysisJob(true);
final Configuration configuration = new Configuration();
configuration.includeAnalyzers = false;
final ConsumeRowHandler handler = new ConsumeRowHandler(job, _configuration, configuration);
final List<InputRow> result;
final MockInputRow inputRow =
new MockInputRow().put(nameColumn, "Kasper").put(ageColumn, null).put(countryColumn, null);
result = handler.consumeRow(inputRow).getRows();
assertEquals(1, result.size());
final InputRow outputRow = result.get(0);
assertEquals("TransformedInputRow[values={"
+ "TransformedInputColumn[id=trans-0001-0002,name=mock output]=mocked: Kasper, "
+ "TransformedInputColumn[id=trans-0003-0004,name=mock output]=mocked: mocked: Kasper}," + "delegate="
+ inputRow.toString() + "]", outputRow.toString());
final List<InputColumn<?>> outputColumns = outputRow.getInputColumns();
assertEquals(5, outputColumns.size());
assertEquals("mocked: Kasper", outputRow.getValue(outputColumns.get(3)).toString());
assertEquals("mocked: mocked: Kasper", outputRow.getValue(outputColumns.get(4)).toString());
}
示例10: runAnalysisJob
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
private AnalysisResultFuture runAnalysisJob(final String appName, final URI analysisJobXmlPath,
final String expectedAnalysisJobName, final boolean useMinPartitions,
final SparkJobLifeCycleListener sparkJobLifeCycleListener) throws Exception {
final AnalysisResultFuture result;
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName(appName);
try (JavaSparkContext sparkContext = new JavaSparkContext(sparkConf)) {
final SparkJobContext sparkJobContext =
new SparkJobContext(URI.create("src/test/resources/conf_local.xml"), analysisJobXmlPath, null,
sparkContext);
if (sparkJobLifeCycleListener != null) {
sparkJobContext.addSparkJobLifeCycleListener(sparkJobLifeCycleListener);
}
final AnalysisJob job = sparkJobContext.getAnalysisJob();
assertNotNull(job);
assertEquals(expectedAnalysisJobName, sparkJobContext.getJobName());
final SparkAnalysisRunner sparkAnalysisRunner = new SparkAnalysisRunner(sparkContext, sparkJobContext,
useMinPartitions ? MIN_PARTITIONS_MULTIPLE : null);
result = sparkAnalysisRunner.run(job);
}
if (result.isErrornous()) {
throw (Exception) result.getErrors().get(0);
}
return result;
}
示例11: onUnexpectedError
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
public void onUnexpectedError(final AnalysisJob job, Throwable throwable) {
throwable = ErrorUtils.unwrapForPresentation(throwable);
if (throwable instanceof AnalysisJobCancellation) {
_progressInformationPanel.onCancelled();
_cancelButton.setEnabled(false);
return;
} else if (throwable instanceof PreviousErrorsExistException) {
// do nothing
return;
}
_progressInformationPanel.addUserLog("An error occurred in the analysis job!", throwable, true);
}
示例12: DistributedJobContextImpl
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
public DistributedJobContextImpl(final DataCleanerConfiguration masterConfiguration, final AnalysisJob masterJob,
final int chunkIndex, final int chunkCount) {
_masterConfiguration = masterConfiguration;
_masterJob = masterJob;
_chunkIndex = chunkIndex;
_chunkCount = chunkCount;
}
示例13: buildSlaveJob
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
/**
* Creates a slave job by copying the original job and adding a
* {@link MaxRowsFilter} as a default requirement.
*
* @param job
* @param firstRow
* @param maxRows
* @return
*/
private AnalysisJob buildSlaveJob(final AnalysisJob job, final int slaveJobIndex, final int firstRow,
final int maxRows) {
logger.info("Building slave job {} with firstRow={} and maxRow={}", slaveJobIndex + 1, firstRow, maxRows);
try (AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(_configuration, job)) {
final FilterComponentBuilder<MaxRowsFilter, Category> maxRowsFilter =
jobBuilder.addFilter(MaxRowsFilter.class);
maxRowsFilter.getComponentInstance().setFirstRow(firstRow);
maxRowsFilter.getComponentInstance().setMaxRows(maxRows);
final boolean naturalRecordOrderConsistent =
jobBuilder.getDatastore().getPerformanceCharacteristics().isNaturalRecordOrderConsistent();
if (!naturalRecordOrderConsistent) {
final InputColumn<?> orderColumn = findOrderByColumn(jobBuilder);
maxRowsFilter.getComponentInstance().setOrderColumn(orderColumn);
}
jobBuilder.setDefaultRequirement(maxRowsFilter, MaxRowsFilter.Category.VALID);
// in assertion/test mode do an early validation
assert jobBuilder.isConfigured(true);
return jobBuilder.toAnalysisJob();
}
}
示例14: jobSuccess
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Override
public void jobSuccess(final AnalysisJob job, final AnalysisJobMetrics metrics) {
for (final AnalysisListener delegate : _delegates) {
try {
delegate.jobSuccess(job, metrics);
} catch (final Exception e) {
logger.warn("Listener {} failed", delegate.getClass().getName(), e);
}
}
}
示例15: createWrappedAnalysisJob
import org.datacleaner.job.AnalysisJob; //导入依赖的package包/类
@Override
protected AnalysisJob createWrappedAnalysisJob() {
try (AnalysisJobBuilder builder = new AnalysisJobBuilder(_configuration)) {
builder.setDatastore("orig_input");
builder.addSourceColumns("table.foo");
builder.addTransformer(MockTransformer.class).addInputColumns(builder.getSourceColumns());
builder.addAnalyzer(MockAnalyzer.class).addInputColumns(builder.getAvailableInputColumns(Object.class));
return builder.toAnalysisJob();
}
}