本文整理汇总了Java中com.crawljax.core.configuration.CrawljaxConfiguration类的典型用法代码示例。如果您正苦于以下问题:Java CrawljaxConfiguration类的具体用法?Java CrawljaxConfiguration怎么用?Java CrawljaxConfiguration使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CrawljaxConfiguration类属于com.crawljax.core.configuration包,在下文中一共展示了CrawljaxConfiguration类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
builder.addPlugin(new OnNewStatePlugin() {
@Override
public void onNewState(CrawlerContext context, StateVertex newState) {
// This will print the DOM when a new state is detected. You should see it in your
// console.
LOG.info("Found a new dom! Here it is:\n{}", context.getBrowser().getStrippedDom());
}
@Override
public String toString() {
return "Our example plugin";
}
});
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例2: main
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
* Run this method to start the crawl.
*/
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
// Add the invariant that checks that the string isn't present.
builder.crawlRules().addInvariant(
"Detect a string",
new NotRegexCondition(
"Invariants can be used to perform tests on the current state"));
// This plugin will just print the error.
builder.addPlugin(new OnInvariantViolationPlugin() {
@Override
public void onInvariantViolation(Invariant invariant, CrawlerContext context) {
LOG.error("\n\n!!! Invariant {} violated !!!\n", invariant);
}
});
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例3: main
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
* Entry point
*/
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
builder.crawlRules().insertRandomDataInInputForms(false);
builder.crawlRules().click("a");
builder.crawlRules().click("button");
// except these
builder.crawlRules().dontClick("a").underXPath("//DIV[@id='guser']");
builder.crawlRules().dontClick("a").withText("Language Tools");
// limit the crawling scope
builder.setMaximumStates(MAX_NUMBER_STATES);
builder.setMaximumDepth(MAX_DEPTH);
builder.addPlugin(new SamplePlugin());
builder.crawlRules().setInputSpec(getInputSpecification());
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例4: configure
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Override
protected void configure() {
bind(URL.class).annotatedWith(BaseUrl.class).toInstance(config.getUrl());
bind(CrawljaxConfiguration.class).toInstance(config);
bind(CrawlRules.class).toInstance(config.getCrawlRules());
bind(ProxyConfiguration.class).toInstance(config.getProxyConfiguration());
BrowserConfiguration browserConfig = config.getBrowserConfig();
bind(BrowserConfiguration.class).toInstance(browserConfig);
if (browserConfig.isDefaultBuilder()) {
bind(EmbeddedBrowser.class).toProvider(WebDriverBrowserBuilder.class);
} else {
bind(EmbeddedBrowser.class).toProvider(
Providers.guicify(browserConfig.getBrowserBuilder()));
}
}
示例5: Plugins
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Inject
public Plugins(CrawljaxConfiguration config, MetricRegistry registry) {
this.registry = registry;
List<? extends Plugin> plugins = config.getPlugins();
Preconditions.checkNotNull(plugins);
ImmutableListMultimap.Builder<Class<? extends Plugin>, Plugin> builder =
ImmutableListMultimap
.builder();
if (plugins.isEmpty()) {
LOGGER.warn("No plugins loaded. There will be no output");
} else {
addPlugins(plugins, builder);
}
this.plugins = builder.build();
checkArgument(
this.plugins.get(DomChangeNotifierPlugin.class).size() < 2,
"Only one or none "
+ DomChangeNotifierPlugin.class.getSimpleName()
+ " can be specified");
this.counters = registerCounters(registry);
}
示例6: Crawler
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Inject
Crawler(CrawlerContext context, CrawljaxConfiguration config,
StateComparator stateComparator, UnfiredCandidateActions candidateActionCache,
FormHandlerFactory formHandlerFactory,
WaitConditionChecker waitConditionChecker,
CandidateElementExtractorFactory elementExtractor,
Provider<InMemoryStateFlowGraph> graphProvider,
Plugins plugins) {
this.context = context;
this.graphProvider = graphProvider;
this.browser = context.getBrowser();
this.url = config.getUrl();
this.plugins = plugins;
this.crawlRules = config.getCrawlRules();
this.maxDepth = config.getMaximumDepth();
this.stateComparator = stateComparator;
this.candidateActionCache = candidateActionCache;
this.waitConditionChecker = waitConditionChecker;
this.candidateExtractor = elementExtractor.newExtractor(browser);
this.formHandler = formHandlerFactory.newFormHandler(browser);
}
示例7: CandidateElementExtractor
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
* Create a new CandidateElementExtractor.
*
* @param checker
* the ExtractorManager to use for marking handled elements and retrieve the
* EventableConditionChecker
* @param browser
* the current browser instance used in the Crawler
* @param formHandler
* the form handler.
* @param config
* the checker used to determine if a certain frame must be ignored.
*/
@Inject
public CandidateElementExtractor(ExtractorManager checker, @Assisted EmbeddedBrowser browser,
FormHandler formHandler, CrawljaxConfiguration config) {
checkedElements = checker;
this.browser = browser;
this.formHandler = formHandler;
CrawlRules rules = config.getCrawlRules();
PreCrawlConfiguration preCrawlConfig = rules.getPreCrawlConfig();
this.excludeCrawlElements = asMultiMap(preCrawlConfig.getExcludedElements());
this.includedCrawlElements =
ImmutableList
.<CrawlElement> builder()
.addAll(preCrawlConfig.getIncludedElements())
.addAll(rules.getInputSpecification().getCrawlElements())
.build();
crawlFrames = rules.shouldCrawlFrames();
clickOnce = rules.isClickOnce();
ignoredFrameIdentifiers = rules.getIgnoredFrameIdentifiers();
}
示例8: getCrawljaxConfiguration
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
* retrieve / build the CrawljaxConfiguration for the given arguments.
*/
protected CrawljaxConfiguration getCrawljaxConfiguration() {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(WEB_SERVER.getSiteUrl());
builder.crawlRules().waitAfterEvent(getTimeOutAfterEvent(), TimeUnit.MILLISECONDS);
builder.crawlRules()
.waitAfterReloadUrl(getTimeOutAfterReloadUrl(), TimeUnit.MILLISECONDS);
builder.setMaximumDepth(3);
builder.crawlRules().clickOnce(true);
builder.setBrowserConfig(getBrowserConfiguration());
addCrawlElements(builder);
builder.crawlRules().setInputSpec(getInputSpecification());
addCrawlConditions(builder);
addOracleComparators(builder);
addInvariants(builder);
addWaitConditions(builder);
addPlugins(builder);
return builder.build();
}
示例9: testExtract
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtract() throws InterruptedException, CrawljaxException {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
builder.crawlRules().click("a");
builder.crawlRules().clickOnce(true);
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
assertNotNull(candidates);
assertEquals(15, candidates.size());
}
示例10: newElementExtractor
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private CandidateElementExtractor newElementExtractor(CrawljaxConfiguration config) {
browser = new WebDriverBrowserBuilder(config, plugins).get();
FormHandler formHandler = new FormHandler(browser, config.getCrawlRules());
EventableConditionChecker eventableConditionChecker =
new EventableConditionChecker(config.getCrawlRules());
ConditionTypeChecker<CrawlCondition> crawlConditionChecker =
new ConditionTypeChecker<>(config.getCrawlRules().getPreCrawlConfig()
.getCrawlConditions());
ExtractorManager checker =
new CandidateElementManager(eventableConditionChecker, crawlConditionChecker);
CandidateElementExtractor extractor =
new CandidateElementExtractor(checker, browser, formHandler, config);
return extractor;
}
示例11: testExtractExclude
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtractExclude() throws Exception {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
builder.crawlRules().click("a");
builder.crawlRules().dontClick("div").withAttribute("id", "menubar");
builder.crawlRules().clickOnce(true);
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
assertNotNull(candidates);
assertThat(candidates, hasSize(11));
}
示例12: testExtractIframeContents
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtractIframeContents() throws Exception {
RunWithWebServer server = new RunWithWebServer("/site");
server.before();
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration
.builderFor(server.getSiteUrl().toExternalForm() + "iframe/");
builder.crawlRules().click("a");
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(new URL(server.getSiteUrl().toExternalForm() + "iframe/"));
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
for (CandidateElement e : candidates) {
LOG.debug("candidate: " + e.getUniqueString());
}
server.after();
assertNotNull(extractor);
assertNotNull(candidates);
assertThat(candidates, hasSize(9));
}
示例13: setupForConsumers
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private void setupForConsumers(int consumers) {
executor = Executors.newFixedThreadPool(consumers + 2);
CrawljaxConfiguration config =
CrawljaxConfiguration
.builderFor("http://example.com")
.addPlugin(postCrawlPlugin)
.setBrowserConfig(
new BrowserConfiguration(BrowserType.FIREFOX, consumers))
.build();
candidateActions =
new UnfiredCandidateActions(config.getBrowserConfig(), graphProvider,
new MetricRegistry());
consumersDoneLatch = new ExitNotifier(config.getMaximumStates());
when(consumerFactory.get()).thenReturn(new CrawlTaskConsumer(candidateActions,
consumersDoneLatch, crawler));
crawlSessionProvider = new CrawlSessionProvider(graph, config, new MetricRegistry());
Plugins plugins = new Plugins(config, new MetricRegistry());
controller = new CrawlController(executor, consumerFactory, config, consumersDoneLatch,
crawlSessionProvider, plugins);
}
示例14: writeIndexFile
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private void writeIndexFile(OutPutModel model, CrawljaxConfiguration config) {
LOG.debug("Writing index file");
VelocityContext context = new VelocityContext();
writeJsonToOutDir(Serializer.toPrettyJson(model), JSON_OUTPUT_NAME);
context.put("states", Serializer.toPrettyJson(model.getStates()));
context.put("edges", Serializer.toPrettyJson(model.getEdges()));
context.put("config", BeanToReadableMap.toMap(config));
context.put("crawledUrl", config.getUrl());
context.put("stats", model.getStatistics());
context.put("exitStatus", model.getExitStatus());
LOG.debug("Writing urls report");
context.put("urls", model.getStatistics().getStateStats().getUrls());
writeFile(context, indexFile, "index.html");
}
示例15: main
import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
* Run this method to start the crawl.
*/
public static void main(String[] args) {
CrawljaxRunner crawljax =
new CrawljaxRunner(CrawljaxConfiguration.builderFor("http://demo.crawljax.com/")
.build());
crawljax.call();
}