本文整理匯總了Java中org.apache.nutch.parse.HtmlParseFilter類的典型用法代碼示例。如果您正苦於以下問題:Java HtmlParseFilter類的具體用法?Java HtmlParseFilter怎麽用?Java HtmlParseFilter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
HtmlParseFilter類屬於org.apache.nutch.parse包,在下文中一共展示了HtmlParseFilter類的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: setConf
import org.apache.nutch.parse.HtmlParseFilter; //導入依賴的package包/類
public void setConf(Configuration conf) {
this.conf = conf;
// get the extensions for domain urlfilter
String pluginName = "parsefilter-regex";
Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
HtmlParseFilter.class.getName()).getExtensions();
for (int i = 0; i < extensions.length; i++) {
Extension extension = extensions[i];
if (extension.getDescriptor().getPluginId().equals(pluginName)) {
attributeFile = extension.getAttribute("file");
break;
}
}
// handle blank non empty input
if (attributeFile != null && attributeFile.trim().equals("")) {
attributeFile = null;
}
if (attributeFile != null) {
if (LOG.isInfoEnabled()) {
LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+ " as " + attributeFile);
}
}
else {
if (LOG.isWarnEnabled()) {
LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin "
+ pluginName);
}
}
// domain file and attribute "file" take precedence if defined
String file = conf.get("parsefilter.regex.file");
String stringRules = conf.get("parsefilter.regex.rules");
if (regexFile != null) {
file = regexFile;
}
else if (attributeFile != null) {
file = attributeFile;
}
Reader reader = null;
if (stringRules != null) { // takes precedence over files
reader = new StringReader(stringRules);
} else {
reader = conf.getConfResourceAsReader(file);
}
try {
if (reader == null) {
reader = new FileReader(file);
}
readConfiguration(reader);
}
catch (IOException e) {
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
}