当前位置: 首页>>代码示例>>Java>>正文


Java CrawljaxConfiguration类代码示例

本文整理汇总了Java中com.crawljax.core.configuration.CrawljaxConfiguration的典型用法代码示例。如果您正苦于以下问题:Java CrawljaxConfiguration类的具体用法?Java CrawljaxConfiguration怎么用?Java CrawljaxConfiguration使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CrawljaxConfiguration类属于com.crawljax.core.configuration包,在下文中一共展示了CrawljaxConfiguration类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
public static void main(String[] args) {

		CrawljaxConfigurationBuilder builder =
		        CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
		builder.addPlugin(new OnNewStatePlugin() {

			@Override
			public void onNewState(CrawlerContext context, StateVertex newState) {
				// This will print the DOM when a new state is detected. You should see it in your
				// console.
				LOG.info("Found a new dom! Here it is:\n{}", context.getBrowser().getStrippedDom());
			}

			@Override
			public String toString() {
				return "Our example plugin";
			}
		});
		CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
		crawljax.call();
	}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:22,代码来源:PluginExample.java

示例2: main

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
 * Run this method to start the crawl.
 */
public static void main(String[] args) {
	CrawljaxConfigurationBuilder builder =
	        CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");

	// Add the invariant that checks that the string isn't present.
	builder.crawlRules().addInvariant(
	        "Detect a string",
	        new NotRegexCondition(
	                "Invariants can be used to perform tests on the current state"));

	// This plugin will just print the error.
	builder.addPlugin(new OnInvariantViolationPlugin() {

		@Override
		public void onInvariantViolation(Invariant invariant, CrawlerContext context) {
			LOG.error("\n\n!!! Invariant {} violated !!!\n", invariant);
		}

	});

	CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
	crawljax.call();
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:27,代码来源:InvariantExample.java

示例3: main

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
 * Entry point
 */
public static void main(String[] args) {
	CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
	builder.crawlRules().insertRandomDataInInputForms(false);

	builder.crawlRules().click("a");
	builder.crawlRules().click("button");

	// except these
	builder.crawlRules().dontClick("a").underXPath("//DIV[@id='guser']");
	builder.crawlRules().dontClick("a").withText("Language Tools");

	// limit the crawling scope
	builder.setMaximumStates(MAX_NUMBER_STATES);
	builder.setMaximumDepth(MAX_DEPTH);

	builder.addPlugin(new SamplePlugin());

	builder.crawlRules().setInputSpec(getInputSpecification());

	CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
	crawljax.call();
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:26,代码来源:Runner.java

示例4: configure

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Override
protected void configure() {
	bind(URL.class).annotatedWith(BaseUrl.class).toInstance(config.getUrl());
	bind(CrawljaxConfiguration.class).toInstance(config);
	bind(CrawlRules.class).toInstance(config.getCrawlRules());
	bind(ProxyConfiguration.class).toInstance(config.getProxyConfiguration());

	BrowserConfiguration browserConfig = config.getBrowserConfig();
	bind(BrowserConfiguration.class).toInstance(browserConfig);

	if (browserConfig.isDefaultBuilder()) {
		bind(EmbeddedBrowser.class).toProvider(WebDriverBrowserBuilder.class);
	} else {
		bind(EmbeddedBrowser.class).toProvider(
		        Providers.guicify(browserConfig.getBrowserBuilder()));
	}
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:18,代码来源:ConfigurationModule.java

示例5: Plugins

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Inject
public Plugins(CrawljaxConfiguration config, MetricRegistry registry) {
	this.registry = registry;
	List<? extends Plugin> plugins = config.getPlugins();
	Preconditions.checkNotNull(plugins);
	ImmutableListMultimap.Builder<Class<? extends Plugin>, Plugin> builder =
	        ImmutableListMultimap
	                .builder();
	if (plugins.isEmpty()) {
		LOGGER.warn("No plugins loaded. There will be no output");
	} else {
		addPlugins(plugins, builder);
	}
	this.plugins = builder.build();

	checkArgument(
	        this.plugins.get(DomChangeNotifierPlugin.class).size() < 2,
	        "Only one or none "
	                + DomChangeNotifierPlugin.class.getSimpleName()
	                + " can be specified");

	this.counters = registerCounters(registry);
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:24,代码来源:Plugins.java

示例6: Crawler

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Inject
Crawler(CrawlerContext context, CrawljaxConfiguration config,
		StateComparator stateComparator, UnfiredCandidateActions candidateActionCache,
		FormHandlerFactory formHandlerFactory,
		WaitConditionChecker waitConditionChecker,
		CandidateElementExtractorFactory elementExtractor,
		Provider<InMemoryStateFlowGraph> graphProvider,
		Plugins plugins) {
	this.context = context;
	this.graphProvider = graphProvider;
	this.browser = context.getBrowser();
	this.url = config.getUrl();
	this.plugins = plugins;
	this.crawlRules = config.getCrawlRules();
	this.maxDepth = config.getMaximumDepth();
	this.stateComparator = stateComparator;
	this.candidateActionCache = candidateActionCache;
	this.waitConditionChecker = waitConditionChecker;
	this.candidateExtractor = elementExtractor.newExtractor(browser);
	this.formHandler = formHandlerFactory.newFormHandler(browser);
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:22,代码来源:Crawler.java

示例7: CandidateElementExtractor

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
 * Create a new CandidateElementExtractor.
 * 
 * @param checker
 *            the ExtractorManager to use for marking handled elements and retrieve the
 *            EventableConditionChecker
 * @param browser
 *            the current browser instance used in the Crawler
 * @param formHandler
 *            the form handler.
 * @param config
 *            the checker used to determine if a certain frame must be ignored.
 */
@Inject
public CandidateElementExtractor(ExtractorManager checker, @Assisted EmbeddedBrowser browser,
        FormHandler formHandler, CrawljaxConfiguration config) {
	checkedElements = checker;
	this.browser = browser;
	this.formHandler = formHandler;
	CrawlRules rules = config.getCrawlRules();
	PreCrawlConfiguration preCrawlConfig = rules.getPreCrawlConfig();
	this.excludeCrawlElements = asMultiMap(preCrawlConfig.getExcludedElements());
	this.includedCrawlElements =
	        ImmutableList
	                .<CrawlElement> builder()
	                .addAll(preCrawlConfig.getIncludedElements())
	                .addAll(rules.getInputSpecification().getCrawlElements())
	                .build();

	crawlFrames = rules.shouldCrawlFrames();
	clickOnce = rules.isClickOnce();
	ignoredFrameIdentifiers = rules.getIgnoredFrameIdentifiers();
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:34,代码来源:CandidateElementExtractor.java

示例8: getCrawljaxConfiguration

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
 * retrieve / build the CrawljaxConfiguration for the given arguments.
 */
protected CrawljaxConfiguration getCrawljaxConfiguration() {

	CrawljaxConfigurationBuilder builder =
	        CrawljaxConfiguration.builderFor(WEB_SERVER.getSiteUrl());
	builder.crawlRules().waitAfterEvent(getTimeOutAfterEvent(), TimeUnit.MILLISECONDS);
	builder.crawlRules()
	        .waitAfterReloadUrl(getTimeOutAfterReloadUrl(), TimeUnit.MILLISECONDS);
	builder.setMaximumDepth(3);
	builder.crawlRules().clickOnce(true);

	builder.setBrowserConfig(getBrowserConfiguration());

	addCrawlElements(builder);

	builder.crawlRules().setInputSpec(getInputSpecification());

	addCrawlConditions(builder);
	addOracleComparators(builder);
	addInvariants(builder);
	addWaitConditions(builder);
	addPlugins(builder);

	return builder.build();
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:28,代码来源:LargeTestBase.java

示例9: testExtract

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtract() throws InterruptedException, CrawljaxException {
	CrawljaxConfigurationBuilder builder =
	        CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
	builder.crawlRules().click("a");
	builder.crawlRules().clickOnce(true);
	CrawljaxConfiguration config = builder.build();

	CandidateElementExtractor extractor = newElementExtractor(config);
	browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());
	List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

	assertNotNull(candidates);
	assertEquals(15, candidates.size());

}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:17,代码来源:CandidateElementExtractorTest.java

示例10: newElementExtractor

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private CandidateElementExtractor newElementExtractor(CrawljaxConfiguration config) {
	browser = new WebDriverBrowserBuilder(config, plugins).get();
	FormHandler formHandler = new FormHandler(browser, config.getCrawlRules());

	EventableConditionChecker eventableConditionChecker =
	        new EventableConditionChecker(config.getCrawlRules());
	ConditionTypeChecker<CrawlCondition> crawlConditionChecker =
	        new ConditionTypeChecker<>(config.getCrawlRules().getPreCrawlConfig()
	                .getCrawlConditions());
	ExtractorManager checker =
	        new CandidateElementManager(eventableConditionChecker, crawlConditionChecker);
	CandidateElementExtractor extractor =
	        new CandidateElementExtractor(checker, browser, formHandler, config);

	return extractor;
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:17,代码来源:CandidateElementExtractorTest.java

示例11: testExtractExclude

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtractExclude() throws Exception {
	CrawljaxConfigurationBuilder builder =
	        CrawljaxConfiguration.builderFor(DEMO_SITE_SERVER.getSiteUrl().toExternalForm());
	builder.crawlRules().click("a");
	builder.crawlRules().dontClick("div").withAttribute("id", "menubar");
	builder.crawlRules().clickOnce(true);
	CrawljaxConfiguration config = builder.build();

	CandidateElementExtractor extractor = newElementExtractor(config);
	browser.goToUrl(DEMO_SITE_SERVER.getSiteUrl());

	List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

	assertNotNull(candidates);
	assertThat(candidates, hasSize(11));

}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:19,代码来源:CandidateElementExtractorTest.java

示例12: testExtractIframeContents

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
@Test
public void testExtractIframeContents() throws Exception {
	RunWithWebServer server = new RunWithWebServer("/site");
	server.before();
	CrawljaxConfigurationBuilder builder =
	        CrawljaxConfiguration
	                .builderFor(server.getSiteUrl().toExternalForm() + "iframe/");
	builder.crawlRules().click("a");
	CrawljaxConfiguration config = builder.build();

	CandidateElementExtractor extractor = newElementExtractor(config);
	browser.goToUrl(new URL(server.getSiteUrl().toExternalForm() + "iframe/"));
	List<CandidateElement> candidates = extractor.extract(DUMMY_STATE);

	for (CandidateElement e : candidates) {
		LOG.debug("candidate: " + e.getUniqueString());
	}

	server.after();

	assertNotNull(extractor);
	assertNotNull(candidates);
	assertThat(candidates, hasSize(9));

}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:26,代码来源:CandidateElementExtractorTest.java

示例13: setupForConsumers

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private void setupForConsumers(int consumers) {
	executor = Executors.newFixedThreadPool(consumers + 2);
	CrawljaxConfiguration config =
	        CrawljaxConfiguration
	                .builderFor("http://example.com")
	                .addPlugin(postCrawlPlugin)
	                .setBrowserConfig(
	                        new BrowserConfiguration(BrowserType.FIREFOX, consumers))
	                .build();

	candidateActions =
	        new UnfiredCandidateActions(config.getBrowserConfig(), graphProvider,
	                new MetricRegistry());

	consumersDoneLatch = new ExitNotifier(config.getMaximumStates());

	when(consumerFactory.get()).thenReturn(new CrawlTaskConsumer(candidateActions,
	        consumersDoneLatch, crawler));

	crawlSessionProvider = new CrawlSessionProvider(graph, config, new MetricRegistry());

	Plugins plugins = new Plugins(config, new MetricRegistry());
	controller = new CrawlController(executor, consumerFactory, config, consumersDoneLatch,
	        crawlSessionProvider, plugins);

}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:27,代码来源:CrawlControllerTest.java

示例14: writeIndexFile

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
private void writeIndexFile(OutPutModel model, CrawljaxConfiguration config) {
	LOG.debug("Writing index file");
	VelocityContext context = new VelocityContext();
	writeJsonToOutDir(Serializer.toPrettyJson(model), JSON_OUTPUT_NAME);
	context.put("states", Serializer.toPrettyJson(model.getStates()));
	context.put("edges", Serializer.toPrettyJson(model.getEdges()));
	context.put("config", BeanToReadableMap.toMap(config));
	context.put("crawledUrl", config.getUrl());
	context.put("stats", model.getStatistics());
	context.put("exitStatus", model.getExitStatus());

	LOG.debug("Writing urls report");
	context.put("urls", model.getStatistics().getStateStats().getUrls());

	writeFile(context, indexFile, "index.html");
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:17,代码来源:OutputBuilder.java

示例15: main

import com.crawljax.core.configuration.CrawljaxConfiguration; //导入依赖的package包/类
/**
 * Run this method to start the crawl.
 */
public static void main(String[] args) {
	CrawljaxRunner crawljax =
	        new CrawljaxRunner(CrawljaxConfiguration.builderFor("http://demo.crawljax.com/")
	                .build());
	crawljax.call();
}
 
开发者ID:aminmf,项目名称:crawljax,代码行数:10,代码来源:SimplestExample.java


注:本文中的com.crawljax.core.configuration.CrawljaxConfiguration类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。