本文整理汇总了Java中org.apache.nutch.metadata.Metadata.names方法的典型用法代码示例。如果您正苦于以下问题:Java Metadata.names方法的具体用法?Java Metadata.names怎么用?Java Metadata.names使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.nutch.metadata.Metadata
的用法示例。
在下文中一共展示了Metadata.names方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: filter
import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
throws IndexingException {
ParseData dataP = parse.getData();
Metadata meta = dataP.getParseMeta();
boolean index = false;
for (String key : meta.names()) {
if(key.equals("ogc_service"))
index = true;
String value = meta.get(key);
LOG.info("Adding " + url + " to NutchDocument");
doc.add(key, value);
}
/* Return the document if it is an ogc service, otherwise return null */
return index ? doc : null;
}
示例2: mergeMetadata
import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
private void mergeMetadata(Metadata first, Metadata second) {
for (String name : second.names()) {
String[] values = second.getValues(name);
for (String value : values) {
first.add(name, value);
}
}
}
示例3: filter
import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
public ParseResult filter(Content content, ParseResult parseResult,
HTMLMetaTags metaTags, DocumentFragment doc) {
Parse parse = parseResult.get(content.getUrl());
Metadata metadata = parse.getData().getParseMeta();
// check in the metadata first : the tika-parser
// might have stored the values there already
for (String mdName : metadata.names()) {
addIndexedMetatags(metadata, mdName, metadata.getValues(mdName));
}
Metadata generalMetaTags = metaTags.getGeneralTags();
for (String tagName : generalMetaTags.names()) {
addIndexedMetatags(metadata, tagName, generalMetaTags.getValues(tagName));
}
Properties httpequiv = metaTags.getHttpEquivTags();
for (Enumeration<?> tagNames = httpequiv.propertyNames(); tagNames
.hasMoreElements();) {
String name = (String) tagNames.nextElement();
String value = httpequiv.getProperty(name);
addIndexedMetatags(metadata, name, value);
}
return parseResult;
}
示例4: handle
import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public void handle(Request req, HttpServletResponse res, String target,
int dispatch) throws IOException, ServletException {
try {
String uri = req.getUri().toString();
LOG.info("URI: " + uri);
addMyHeader(res, "URI", uri);
Text url = new Text(uri.toString());
CrawlDatum cd = seg.getCrawlDatum(url);
if (cd != null) {
addMyHeader(res, "Res", "found");
LOG.info("-got " + cd.toString());
ProtocolStatus ps = (ProtocolStatus) cd.getMetaData().get(
Nutch.WRITABLE_PROTO_STATUS_KEY);
if (ps != null) {
Integer TrCode = protoCodes.get(ps.getCode());
if (TrCode != null) {
res.setStatus(TrCode.intValue());
} else {
res.setStatus(HttpServletResponse.SC_OK);
}
addMyHeader(res, "ProtocolStatus", ps.toString());
} else {
res.setStatus(HttpServletResponse.SC_OK);
}
Content c = seg.getContent(url);
if (c == null) { // missing content
req.setHandled(true);
res.addHeader("X-Handled-By", getClass().getSimpleName());
return;
}
byte[] data = c.getContent();
LOG.debug("-data len=" + data.length);
Metadata meta = c.getMetadata();
String[] names = meta.names();
LOG.debug("- " + names.length + " meta");
for (int i = 0; i < names.length; i++) {
boolean my = true;
char ch = names[i].charAt(0);
if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
// pretty good chance it's a standard header
my = false;
}
String[] values = meta.getValues(names[i]);
for (int k = 0; k < values.length; k++) {
if (my) {
addMyHeader(res, names[i], values[k]);
} else {
res.addHeader(names[i], values[k]);
}
}
}
req.setHandled(true);
res.addHeader("X-Handled-By", getClass().getSimpleName());
res.setContentType(meta.get(Metadata.CONTENT_TYPE));
res.setContentLength(data.length);
OutputStream os = res.getOutputStream();
os.write(data, 0, data.length);
res.flushBuffer();
} else {
addMyHeader(res, "Res", "not found");
LOG.info(" -not found " + url);
}
} catch (Exception e) {
e.printStackTrace();
LOG.warn(StringUtils.stringifyException(e));
addMyHeader(res, "Res", "Exception: " + StringUtils.stringifyException(e));
}
}
示例5: handle
import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public void handle(Request req, HttpServletResponse res, String target,
int dispatch) throws IOException, ServletException {
try {
String uri = req.getUri().toString();
LOG.info("URI: " + uri);
addMyHeader(res, "URI", uri);
Text url = new Text(uri.toString());
CrawlDatum cd = seg.getCrawlDatum(url);
if (cd != null) {
addMyHeader(res, "Res", "found");
LOG.info("-got " + cd.toString());
ProtocolStatus ps = (ProtocolStatus)cd.getMetaData().get(Nutch.WRITABLE_PROTO_STATUS_KEY);
if (ps != null) {
Integer TrCode = protoCodes.get(ps.getCode());
if (TrCode != null) {
res.setStatus(TrCode.intValue());
} else {
res.setStatus(HttpServletResponse.SC_OK);
}
addMyHeader(res, "ProtocolStatus", ps.toString());
} else {
res.setStatus(HttpServletResponse.SC_OK);
}
Content c = seg.getContent(url);
if (c == null) { // missing content
req.setHandled(true);
res.addHeader("X-Handled-By", getClass().getSimpleName());
return;
}
byte[] data = c.getContent();
LOG.debug("-data len=" + data.length);
Metadata meta = c.getMetadata();
String[] names = meta.names();
LOG.debug("- " + names.length + " meta");
for (int i = 0; i < names.length; i++) {
boolean my = true;
char ch = names[i].charAt(0);
if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
// pretty good chance it's a standard header
my = false;
}
String[] values = meta.getValues(names[i]);
for (int k = 0; k < values.length; k++) {
if (my) {
addMyHeader(res, names[i], values[k]);
} else {
res.addHeader(names[i], values[k]);
}
}
}
req.setHandled(true);
res.addHeader("X-Handled-By", getClass().getSimpleName());
res.setContentType(meta.get(Metadata.CONTENT_TYPE));
res.setContentLength(data.length);
OutputStream os = res.getOutputStream();
os.write(data, 0, data.length);
res.flushBuffer();
} else {
addMyHeader(res, "Res", "not found");
LOG.info(" -not found " + url);
}
} catch (Exception e) {
e.printStackTrace();
LOG.warn(StringUtils.stringifyException(e));
addMyHeader(res, "Res", "Exception: " + StringUtils.stringifyException(e));
}
}