当前位置: 首页>>代码示例>>Java>>正文


Java Metadata.names方法代码示例

本文整理汇总了Java中org.apache.nutch.metadata.Metadata.names方法的典型用法代码示例。如果您正苦于以下问题:Java Metadata.names方法的具体用法?Java Metadata.names怎么用?Java Metadata.names使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.nutch.metadata.Metadata的用法示例。


在下文中一共展示了Metadata.names方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: filter

import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
		throws IndexingException {
	ParseData dataP = parse.getData();
	Metadata meta = dataP.getParseMeta();
	boolean index = false;
	
	for (String key : meta.names()) {
		if(key.equals("ogc_service"))
			index = true;
		String value = meta.get(key);
		LOG.info("Adding " + url + " to NutchDocument");
		doc.add(key, value);
	}
	/* Return the document if it is an ogc service, otherwise return null */
	return index ? doc : null;
}
 
开发者ID:jorcox,项目名称:GeoCrawler,代码行数:18,代码来源:OgcIndexingFilter.java

示例2: mergeMetadata

import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
private void mergeMetadata(Metadata first, Metadata second) {
  for (String name : second.names()) {
    String[] values = second.getValues(name);
    for (String value : values) {
      first.add(name, value);
    }
  }
}
 
开发者ID:jorcox,项目名称:GeoCrawler,代码行数:9,代码来源:FeedParser.java

示例3: filter

import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
public ParseResult filter(Content content, ParseResult parseResult,
    HTMLMetaTags metaTags, DocumentFragment doc) {

  Parse parse = parseResult.get(content.getUrl());
  Metadata metadata = parse.getData().getParseMeta();

  // check in the metadata first : the tika-parser
  // might have stored the values there already
  for (String mdName : metadata.names()) {
    addIndexedMetatags(metadata, mdName, metadata.getValues(mdName));
  }

  Metadata generalMetaTags = metaTags.getGeneralTags();
  for (String tagName : generalMetaTags.names()) {
    addIndexedMetatags(metadata, tagName, generalMetaTags.getValues(tagName));
  }

  Properties httpequiv = metaTags.getHttpEquivTags();
  for (Enumeration<?> tagNames = httpequiv.propertyNames(); tagNames
      .hasMoreElements();) {
    String name = (String) tagNames.nextElement();
    String value = httpequiv.getProperty(name);
    addIndexedMetatags(metadata, name, value);
  }

  return parseResult;
}
 
开发者ID:jorcox,项目名称:GeoCrawler,代码行数:28,代码来源:MetaTagsParser.java

示例4: handle

import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public void handle(Request req, HttpServletResponse res, String target,
    int dispatch) throws IOException, ServletException {
  try {
    String uri = req.getUri().toString();
    LOG.info("URI: " + uri);
    addMyHeader(res, "URI", uri);
    Text url = new Text(uri.toString());
    CrawlDatum cd = seg.getCrawlDatum(url);
    if (cd != null) {
      addMyHeader(res, "Res", "found");
      LOG.info("-got " + cd.toString());
      ProtocolStatus ps = (ProtocolStatus) cd.getMetaData().get(
          Nutch.WRITABLE_PROTO_STATUS_KEY);
      if (ps != null) {
        Integer TrCode = protoCodes.get(ps.getCode());
        if (TrCode != null) {
          res.setStatus(TrCode.intValue());
        } else {
          res.setStatus(HttpServletResponse.SC_OK);
        }
        addMyHeader(res, "ProtocolStatus", ps.toString());
      } else {
        res.setStatus(HttpServletResponse.SC_OK);
      }
      Content c = seg.getContent(url);
      if (c == null) { // missing content
        req.setHandled(true);
        res.addHeader("X-Handled-By", getClass().getSimpleName());
        return;
      }
      byte[] data = c.getContent();
      LOG.debug("-data len=" + data.length);
      Metadata meta = c.getMetadata();
      String[] names = meta.names();
      LOG.debug("- " + names.length + " meta");
      for (int i = 0; i < names.length; i++) {
        boolean my = true;
        char ch = names[i].charAt(0);
        if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
          // pretty good chance it's a standard header
          my = false;
        }
        String[] values = meta.getValues(names[i]);
        for (int k = 0; k < values.length; k++) {
          if (my) {
            addMyHeader(res, names[i], values[k]);
          } else {
            res.addHeader(names[i], values[k]);
          }
        }
      }
      req.setHandled(true);
      res.addHeader("X-Handled-By", getClass().getSimpleName());
      res.setContentType(meta.get(Metadata.CONTENT_TYPE));
      res.setContentLength(data.length);
      OutputStream os = res.getOutputStream();
      os.write(data, 0, data.length);
      res.flushBuffer();
    } else {
      addMyHeader(res, "Res", "not found");
      LOG.info(" -not found " + url);
    }
  } catch (Exception e) {
    e.printStackTrace();
    LOG.warn(StringUtils.stringifyException(e));
    addMyHeader(res, "Res", "Exception: " + StringUtils.stringifyException(e));
  }
}
 
开发者ID:jorcox,项目名称:GeoCrawler,代码行数:70,代码来源:SegmentHandler.java

示例5: handle

import org.apache.nutch.metadata.Metadata; //导入方法依赖的package包/类
@Override
public void handle(Request req, HttpServletResponse res, String target,
        int dispatch) throws IOException, ServletException {
  try {
    String uri = req.getUri().toString();
    LOG.info("URI: " + uri);
    addMyHeader(res, "URI", uri);
    Text url = new Text(uri.toString());
    CrawlDatum cd = seg.getCrawlDatum(url);
    if (cd != null) {
      addMyHeader(res, "Res", "found");
      LOG.info("-got " + cd.toString());
      ProtocolStatus ps = (ProtocolStatus)cd.getMetaData().get(Nutch.WRITABLE_PROTO_STATUS_KEY);
      if (ps != null) {
        Integer TrCode = protoCodes.get(ps.getCode());
        if (TrCode != null) {
          res.setStatus(TrCode.intValue());            
        } else {
          res.setStatus(HttpServletResponse.SC_OK);
        }
        addMyHeader(res, "ProtocolStatus", ps.toString());
      } else {
        res.setStatus(HttpServletResponse.SC_OK);          
      }
      Content c = seg.getContent(url);
      if (c == null) { // missing content
        req.setHandled(true);
        res.addHeader("X-Handled-By", getClass().getSimpleName());
        return;
      }
      byte[] data = c.getContent();
      LOG.debug("-data len=" + data.length);
      Metadata meta = c.getMetadata();
      String[] names = meta.names();
      LOG.debug("- " + names.length + " meta");
      for (int i = 0; i < names.length; i++) {
        boolean my = true;
        char ch = names[i].charAt(0);
        if (Character.isLetter(ch) && Character.isUpperCase(ch)) {
          // pretty good chance it's a standard header
          my = false;
        }
        String[] values = meta.getValues(names[i]);
        for (int k = 0; k < values.length; k++) {
          if (my) {
            addMyHeader(res, names[i], values[k]);
          } else {
            res.addHeader(names[i], values[k]);
          }
        }
      }
      req.setHandled(true);
      res.addHeader("X-Handled-By", getClass().getSimpleName());
      res.setContentType(meta.get(Metadata.CONTENT_TYPE));
      res.setContentLength(data.length);
      OutputStream os = res.getOutputStream();
      os.write(data, 0, data.length);
      res.flushBuffer();
    } else {
      addMyHeader(res, "Res", "not found");
      LOG.info(" -not found " + url);
    }
  } catch (Exception e) {
    e.printStackTrace();
    LOG.warn(StringUtils.stringifyException(e));
    addMyHeader(res, "Res", "Exception: " + StringUtils.stringifyException(e));
  }
}
 
开发者ID:yahoo,项目名称:anthelion,代码行数:69,代码来源:SegmentHandler.java


注:本文中的org.apache.nutch.metadata.Metadata.names方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。