本文整理汇总了Java中edu.uci.ics.crawler4j.crawler.CrawlConfig.isResumableCrawling方法的典型用法代码示例。如果您正苦于以下问题:Java CrawlConfig.isResumableCrawling方法的具体用法?Java CrawlConfig.isResumableCrawling怎么用?Java CrawlConfig.isResumableCrawling使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.uci.ics.crawler4j.crawler.CrawlConfig
的用法示例。
在下文中一共展示了CrawlConfig.isResumableCrawling方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: DocIDServer
import edu.uci.ics.crawler4j.crawler.CrawlConfig; //导入方法依赖的package包/类
public DocIDServer(Environment env, CrawlConfig config) throws DatabaseException {
super(config);
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setAllowCreate(true);
dbConfig.setTransactional(config.isResumableCrawling());
dbConfig.setDeferredWrite(!config.isResumableCrawling());
docIDsDB = env.openDatabase(null, "DocIDs", dbConfig);
if (config.isResumableCrawling()) {
int docCount = getDocCount();
if (docCount > 0) {
logger.info("Loaded {} URLs that had been detected in previous crawl.", docCount);
lastDocID = docCount;
}
} else {
lastDocID = 0;
}
}
示例2: Counters
import edu.uci.ics.crawler4j.crawler.CrawlConfig; //导入方法依赖的package包/类
public Counters(Environment env, CrawlConfig config) throws DatabaseException {
super(config);
this.env = env;
this.counterValues = new HashMap<>();
/*
* When crawling is set to be resumable, we have to keep the statistics
* in a transactional database to make sure they are not lost if crawler
* is crashed or terminated unexpectedly.
*/
if (config.isResumableCrawling()) {
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setAllowCreate(true);
dbConfig.setTransactional(true);
dbConfig.setDeferredWrite(false);
statisticsDB = env.openDatabase(null, "Statistics", dbConfig);
OperationStatus result;
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry value = new DatabaseEntry();
Transaction tnx = env.beginTransaction(null, null);
Cursor cursor = statisticsDB.openCursor(tnx, null);
result = cursor.getFirst(key, value, null);
while (result == OperationStatus.SUCCESS) {
if (value.getData().length > 0) {
String name = new String(key.getData());
long counterValue = Util.byteArray2Long(value.getData());
counterValues.put(name, new Long(counterValue));
}
result = cursor.getNext(key, value, null);
}
cursor.close();
tnx.commit();
}
}
示例3: Frontier
import edu.uci.ics.crawler4j.crawler.CrawlConfig; //导入方法依赖的package包/类
public Frontier(Environment env, CrawlConfig config, DocIDServer docIdServer) {
super(config);
this.counters = new Counters(env, config);
this.docIdServer = docIdServer;
try {
workQueues = new WorkQueues(env, "PendingURLsDB", config.isResumableCrawling());
if (config.isResumableCrawling()) {
scheduledPages = counters.getValue(ReservedCounterNames.SCHEDULED_PAGES);
inProcessPages = new InProcessPagesDB(env);
long numPreviouslyInProcessPages = inProcessPages.getLength();
if (numPreviouslyInProcessPages > 0) {
logger.info("Rescheduling {} URLs from previous crawl.", numPreviouslyInProcessPages);
scheduledPages -= numPreviouslyInProcessPages;
while (true) {
List<WebURL> urls = inProcessPages.get(100);
if (urls.size() == 0) {
break;
}
scheduleAll(urls);
inProcessPages.delete(urls.size());
}
}
} else {
inProcessPages = null;
scheduledPages = 0;
}
} catch (DatabaseException e) {
logger.error("Error while initializing the Frontier: {}", e.getMessage());
workQueues = null;
}
}