本文整理汇总了Java中com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder类的典型用法代码示例。如果您正苦于以下问题:Java CrawljaxConfigurationBuilder类的具体用法?Java CrawljaxConfigurationBuilder怎么用?Java CrawljaxConfigurationBuilder使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CrawljaxConfigurationBuilder类属于com.crawljax.core.configuration.CrawljaxConfiguration包,在下文中一共展示了CrawljaxConfigurationBuilder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
builder.addPlugin(new OnNewStatePlugin() {
@Override
public void onNewState(CrawlerContext context, StateVertex newState) {
// This will print the DOM when a new state is detected. You should see it in your
// console.
LOG.info("Found a new dom! Here it is:\n{}", context.getBrowser().getStrippedDom());
}
@Override
public String toString() {
return "Our example plugin";
}
});
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例2: main
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
/**
* Run this method to start the crawl.
*/
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
// Add the invariant that checks that the string isn't present.
builder.crawlRules().addInvariant(
"Detect a string",
new NotRegexCondition(
"Invariants can be used to perform tests on the current state"));
// This plugin will just print the error.
builder.addPlugin(new OnInvariantViolationPlugin() {
@Override
public void onInvariantViolation(Invariant invariant, CrawlerContext context) {
LOG.error("\n\n!!! Invariant {} violated !!!\n", invariant);
}
});
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例3: main
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
/**
* Entry point
*/
public static void main(String[] args) {
CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
builder.crawlRules().insertRandomDataInInputForms(false);
builder.crawlRules().click("a");
builder.crawlRules().click("button");
// except these
builder.crawlRules().dontClick("a").underXPath("//DIV[@id='guser']");
builder.crawlRules().dontClick("a").withText("Language Tools");
// limit the crawling scope
builder.setMaximumStates(MAX_NUMBER_STATES);
builder.setMaximumDepth(MAX_DEPTH);
builder.addPlugin(new SamplePlugin());
builder.crawlRules().setInputSpec(getInputSpecification());
CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
crawljax.call();
}
示例4: testHiddenElementsSiteCrawl
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
/**
* Shows <a href='https://github.com/crawljax/crawljax/issues/97'>Issue 97</a>
*/
@Test
public void testHiddenElementsSiteCrawl() throws Exception {
CrawlSession crawl = new BaseCrawler("hidden-elements-site") {
@Override
public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
CrawljaxConfigurationBuilder builder =
super.newCrawlConfigurationBuilder();
builder.crawlRules().crawlHiddenAnchors(true);
return builder;
}
}.crawl();
StateFlowGraph stateFlowGraph = crawl.getStateFlowGraph();
/*
* TODO Fix issue #97 https://github.com/crawljax/crawljax/issues/97 It is now party hacked
* by following HREF links.
*/
int withIssue97 = 3 - 1;
assertThat(stateFlowGraph, hasStates(withIssue97));
}
示例5: chromeProxyConfig
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Test
public void chromeProxyConfig() {
assumeThat(System.getProperty("webdriver.chrome.driver"), is(notNullValue()));
CrawlSession crawl =
new BaseCrawler(Resource.newClassPathResource("/site"),
"simplelink/simplelink.html") {
@Override
public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
CrawljaxConfigurationBuilder builder =
super.newCrawlConfigurationBuilder();
builder.setBrowserConfig(new BrowserConfiguration(BrowserType.CHROME));
return builder;
}
}.crawl();
assertThat(crawl.getStateFlowGraph(), hasStates(2));
}
示例6: whenStopIsCalledTheCrawlerStopsGracefully
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Test(timeout = 60_000)
public void whenStopIsCalledTheCrawlerStopsGracefully() throws Exception {
CrawljaxConfigurationBuilder builder = SERVER.newConfigBuilder("infinite.html");
CrawljaxRunner runner = new CrawljaxRunner(builder.setUnlimitedCrawlDepth()
.setUnlimitedCrawlDepth()
.setUnlimitedStates()
.build());
ExecutorService executor = Executors.newSingleThreadExecutor();
executor.submit(runner);
Thread.sleep(TimeUnit.SECONDS.toMillis(15));
runner.stop();
executor.shutdown();
executor.awaitTermination(30, TimeUnit.SECONDS);
assertThat(runner.getReason(), is(ExitStatus.STOPPED));
}
示例7: getCrawljaxConfiguration
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
/**
* retrieve / build the CrawljaxConfiguration for the given arguments.
*/
protected CrawljaxConfiguration getCrawljaxConfiguration() {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(WEB_SERVER.getSiteUrl());
builder.crawlRules().waitAfterEvent(getTimeOutAfterEvent(), TimeUnit.MILLISECONDS);
builder.crawlRules()
.waitAfterReloadUrl(getTimeOutAfterReloadUrl(), TimeUnit.MILLISECONDS);
builder.setMaximumDepth(3);
builder.crawlRules().clickOnce(true);
builder.setBrowserConfig(getBrowserConfiguration());
addCrawlElements(builder);
builder.crawlRules().setInputSpec(getInputSpecification());
addCrawlConditions(builder);
addOracleComparators(builder);
addInvariants(builder);
addWaitConditions(builder);
addPlugins(builder);
return builder.build();
}
示例8: addPlugins
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
/**
* Add the plugins to the given crawljaxConfiguration.
*
* @param crawljaxConfiguration
* the configuration to add the plugins to.
*/
protected static void addPlugins(CrawljaxConfigurationBuilder crawljaxConfiguration) {
crawljaxConfiguration.addPlugin(new PostCrawlStateGraphChecker());
crawljaxConfiguration.addPlugin(new OnInvariantViolationPlugin() {
@Override
public void onInvariantViolation(Invariant invariant, CrawlerContext context) {
LargeTestBase.violatedInvariants.add(invariant);
if (context.getBrowser().getStrippedDom().contains(INVARIANT_TEXT)) {
violatedInvariantStateIsCorrect = true;
LOG.warn("Invariant violated: " + invariant.getDescription());
}
}
});
}
示例9: testExtract
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Test
public void testExtract() throws InterruptedException, CrawljaxException {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
builder.crawlRules().click("a");
builder.crawlRules().clickOnce(true);
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
assertNotNull(candidates);
assertEquals(15, candidates.size());
}
示例10: testExtractExclude
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Test
public void testExtractExclude() throws Exception {
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
builder.crawlRules().click("a");
builder.crawlRules().dontClick("div").withAttribute("id", "menubar");
builder.crawlRules().clickOnce(true);
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
assertNotNull(candidates);
assertThat(candidates, hasSize(11));
}
示例11: testExtractIframeContents
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Test
public void testExtractIframeContents() throws Exception {
RunWithWebServer server = new RunWithWebServer("/site");
server.before();
CrawljaxConfigurationBuilder builder =
CrawljaxConfiguration
.builderFor(server.getSiteUrl().toExternalForm() + "iframe/");
builder.crawlRules().click("a");
CrawljaxConfiguration config = builder.build();
CandidateElementExtractor extractor = newElementExtractor(config);
browser.goToUrl(new URL(server.getSiteUrl().toExternalForm() + "iframe/"));
List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);
for (CandidateElement e : candidates) {
LOG.debug("candidate: " + e.getUniqueString());
}
server.after();
assertNotNull(extractor);
assertNotNull(candidates);
assertThat(candidates, hasSize(9));
}
示例12: get
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Override
public OutPutModel get() {
LoggerFactory.getLogger(RunHoverCrawl.class).info(
"Running the hover crawl");
Resource hoverSiteBase =
Resource.newClassPathResource("hover-test-site");
BaseCrawler hoverSiteCrawl = new BaseCrawler(hoverSiteBase, "") {
@Override
protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
CrawljaxConfigurationBuilder builder =
super.newCrawlConfigurationBuilder().setOutputDirectory(
getTempDir());
return builder;
};
};
CrawlOverview plugin = new CrawlOverview();
hoverSiteCrawl.crawlWith(plugin);
return plugin.getResult();
}
示例13: runCrawl
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@BeforeClass
public static void runCrawl() throws Exception {
outFolder = TMP_FOLDER.getTempDir();
SimpleSiteCrawl simpleCrawl = new SimpleSiteCrawl() {
@Override
protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
return super.newCrawlConfigurationBuilder().setOutputDirectory(
TMP_FOLDER.getTempDir());
}
};
simpleCrawl.setup();
CrawlOverview plugin = new CrawlOverview();
simpleCrawl.crawlWith(plugin);
result = plugin.getResult();
LOG.debug("TMP folder is in {}", outFolder.getAbsoluteFile());
}
示例14: newCrawlConfigurationBuilder
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
@Override
protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
CrawljaxConfigurationBuilder builder = super.newCrawlConfigurationBuilder();
builder.crawlRules().click("a").underXPath("//A[@class='click']");
builder.crawlRules().dontClickChildrenOf("div").withId("dontClick");
return builder;
}
示例15: configureTimers
import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; //导入依赖的package包/类
private void configureTimers(CrawljaxConfigurationBuilder builder) {
if (options.specifiesTimeOut()) {
builder.setMaximumRunTime(options.getSpecifiedTimeOut(), TimeUnit.MINUTES);
}
if (options.specifiesWaitAfterEvent()) {
builder.crawlRules().waitAfterEvent(options.getSpecifiedWaitAfterEvent(),
TimeUnit.MILLISECONDS);
}
if (options.specifiesWaitAfterReload()) {
builder.crawlRules().waitAfterReloadUrl(options.getSpecifiedWaitAfterReload(),
TimeUnit.MILLISECONDS);
}
}