当前位置: 首页>>代码示例>>Java>>正文


Java Document类代码示例

本文整理汇总了Java中org.carrot2.core.Document的典型用法代码示例。如果您正苦于以下问题:Java Document类的具体用法?Java Document怎么用?Java Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Document类属于org.carrot2.core包,在下文中一共展示了Document类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: process

import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public void process() throws ProcessingException {
  clusters = Lists.newArrayListWithCapacity(documents.size());
  
  for (Document document : documents) {
    final Cluster cluster = new Cluster();
    cluster.addPhrases(document.getTitle(), document.getSummary());
    if (document.getLanguage() != null) {
      cluster.addPhrases(document.getLanguage().name());
    }
    for (String field : customFields.split(",")) {
      Object value = document.getField(field);
      if (value != null) {
        cluster.addPhrases(value.toString());
      }
    }
    cluster.addDocuments(document);
    clusters.add(cluster);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:EchoClusteringAlgorithm.java

示例2: displayResults

import org.carrot2.core.Document; //导入依赖的package包/类
/**
 * 对processingResult进行全面的展示,输出至控制台.
 * @author GS
 * @param processingResult
 */
public static void displayResults(ProcessingResult processingResult)
{
    final Collection<Document> documents = processingResult.getDocuments();//所有的文档
    final Collection<Cluster> clusters = processingResult.getClusters();//所有的类别
    final Map<String, Object> attributes = processingResult.getAttributes();//参数

    // Show documents
    if (documents != null)
    {
        displayDocuments(documents);//打印所有文档
    }

    // Show clusters
    if (clusters != null)
    {
        displayClusters(clusters);//打印所有分类
    }

    // Show attributes other attributes
    displayAttributes(attributes);//打印参数
}
 
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:27,代码来源:ConsoleFormatter.java

示例3: cluster

import org.carrot2.core.Document; //导入依赖的package包/类
/**
 * 对所有的PagePOJO进行聚类
 * 
 * @author GS
 * @return
 * @throws IOException
 * @throws Exception
 */
public ProcessingResult cluster(String docPath) throws IOException,
		Exception {
	@SuppressWarnings("unchecked")
	final Controller controller = ControllerFactory
			.createCachingPooling(IDocumentSource.class);
	final List<Document> documents = Lists.newArrayList();
	JsonReader jr = new JsonReader(new File(docPath));
	while (jr.hasNext()) {
		Hit h = jr.next();
		documents.add(new Document(h.getPagePOJO().getTitle(), h
				.getPagePOJO().getContent()));
	}
	jr.close();
	final Map<String, Object> attributes = Maps.newHashMap();
	CommonAttributesDescriptor.attributeBuilder(attributes).documents(
			documents);
	final ProcessingResult englishResult = controller.process(attributes,
			LingoClusteringAlgorithm.class);
	ConsoleFormatter.displayResults(englishResult);// 展示
	return englishResult;
}
 
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:30,代码来源:Cluster.java

示例4: adapt

import org.carrot2.core.Document; //导入依赖的package包/类
private DocumentGroup adapt(Cluster cluster) {
    DocumentGroup group = new DocumentGroup();
    group.setId(cluster.getId());
    List<String> phrases = cluster.getPhrases();
    group.setPhrases(phrases.toArray(new String[phrases.size()]));
    group.setLabel(cluster.getLabel());
    group.setScore(cluster.getScore());
    group.setOtherTopics(cluster.isOtherTopics());

    List<Document> documents = cluster.getDocuments();
    String[] documentReferences = new String[documents.size()];
    for (int i = 0; i < documentReferences.length; i++) {
        documentReferences[i] = documents.get(i).getStringId();
    }
    group.setDocumentReferences(documentReferences);

    List<Cluster> subclusters = cluster.getSubclusters();
    subclusters = (subclusters == null ? Collections.emptyList() : subclusters);
    group.setSubgroups(adapt(subclusters));

    return group;
}
 
开发者ID:carrot2,项目名称:elasticsearch-carrot2,代码行数:23,代码来源:ClusteringAction.java

示例5: cluster

import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
    Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
  try {
    // Prepare attributes for Carrot2 clustering call
    Map<String, Object> attributes = new HashMap<>();
    List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
    attributes.put(AttributeNames.DOCUMENTS, documents);
    attributes.put(AttributeNames.QUERY, query.toString());

    // Pass the fields on which clustering runs.
    attributes.put("solrFieldNames", getFieldsForClustering(sreq));

    // Pass extra overriding attributes from the request, if any
    extractCarrotAttributes(sreq.getParams(), attributes);

    // Perform clustering and convert to an output structure of clusters.
    //
    // Carrot2 uses current thread's context class loader to get
    // certain classes (e.g. custom tokenizer/stemmer) at runtime.
    // To make sure classes from contrib JARs are available,
    // we swap the context class loader for the time of clustering.
    Thread ct = Thread.currentThread();
    ClassLoader prev = ct.getContextClassLoader();
    try {
      ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
      return clustersToNamedList(controller.process(attributes,
              clusteringAlgorithmClass).getClusters(), sreq.getParams());
    } finally {
      ct.setContextClassLoader(prev);
    }
  } catch (Exception e) {
    log.error("Carrot2 clustering failed", e);
    throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:37,代码来源:CarrotClusteringEngine.java

示例6: cluster

import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
    Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
  try {
    // Prepare attributes for Carrot2 clustering call
    Map<String, Object> attributes = new HashMap<String, Object>();
    List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
    attributes.put(AttributeNames.DOCUMENTS, documents);
    attributes.put(AttributeNames.QUERY, query.toString());

    // Pass the fields on which clustering runs to the
    // SolrStopwordsCarrot2LexicalDataFactory
    attributes.put("solrFieldNames", getFieldsForClustering(sreq));

    // Pass extra overriding attributes from the request, if any
    extractCarrotAttributes(sreq.getParams(), attributes);

    // Perform clustering and convert to named list
    // Carrot2 uses current thread's context class loader to get
    // certain classes (e.g. custom tokenizer/stemmer) at runtime.
    // To make sure classes from contrib JARs are available,
    // we swap the context class loader for the time of clustering.
    Thread ct = Thread.currentThread();
    ClassLoader prev = ct.getContextClassLoader();
    try {
      ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
      return clustersToNamedList(controller.process(attributes,
              clusteringAlgorithmClass).getClusters(), sreq.getParams());
    } finally {
      ct.setContextClassLoader(prev);
    }
  } catch (Exception e) {
    log.error("Carrot2 clustering failed", e);
    throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:37,代码来源:CarrotClusteringEngine.java

示例7: cluster

import org.carrot2.core.Document; //导入依赖的package包/类
@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
    Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
  try {
    // Prepare attributes for Carrot2 clustering call
    Map<String, Object> attributes = new HashMap<String, Object>();
    List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
    attributes.put(AttributeNames.DOCUMENTS, documents);
    attributes.put(AttributeNames.QUERY, query.toString());

    // Pass the fields on which clustering runs.
    attributes.put("solrFieldNames", getFieldsForClustering(sreq));

    // Pass extra overriding attributes from the request, if any
    extractCarrotAttributes(sreq.getParams(), attributes);

    // Perform clustering and convert to an output structure of clusters.
    //
    // Carrot2 uses current thread's context class loader to get
    // certain classes (e.g. custom tokenizer/stemmer) at runtime.
    // To make sure classes from contrib JARs are available,
    // we swap the context class loader for the time of clustering.
    Thread ct = Thread.currentThread();
    ClassLoader prev = ct.getContextClassLoader();
    try {
      ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
      return clustersToNamedList(controller.process(attributes,
              clusteringAlgorithmClass).getClusters(), sreq.getParams());
    } finally {
      ct.setContextClassLoader(prev);
    }
  } catch (Exception e) {
    log.error("Carrot2 clustering failed", e);
    throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
  }
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:37,代码来源:CarrotClusteringEngine.java

示例8: displayDocuments

import org.carrot2.core.Document; //导入依赖的package包/类
/**
 * 显示Collection里面的每一个文档,显示标题和URL
 * @author GS
 * @param documents
 */
public static void displayDocuments(final Collection<Document> documents)
{
    System.out.println("Collected " + documents.size() + " documents\n");//所有的文档总数
    for (final Document document : documents)
    {
        displayDocument(0, document);//显示单个文档,包括显示标题和URL
    }
}
 
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:14,代码来源:ConsoleFormatter.java

示例9: displayDocument

import org.carrot2.core.Document; //导入依赖的package包/类
/**
 * 展示单个文档
 * @author GS
 * @param level
 * @param document
 */
private static void displayDocument(final int level, Document document)//展示每一个文档
{
    final String indent = getIndent(level);

    System.out.printf(indent + "[%2s] ", document.getStringId());//打印文档ID号
    System.out.println(document.getField(Document.TITLE));//打印标题
    final String url = document.getField(Document.CONTENT_URL);//正文URL
    if (StringUtils.isNotBlank(url))//如果document里面带有正文的URL则打印
    {
        System.out.println(indent + "     " + url);
    }
    System.out.println();
}
 
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:20,代码来源:ConsoleFormatter.java

示例10: displayCluster

import org.carrot2.core.Document; //导入依赖的package包/类
/**
 * 对一个类进行展示.
 * @author GS
 * @param level
 * @param tag
 * @param cluster
 * @param maxNumberOfDocumentsToShow
 * @param clusterDetailsFormatter
 */
private static void displayCluster(final int level, String tag, Cluster cluster,
    int maxNumberOfDocumentsToShow, ClusterDetailsFormatter clusterDetailsFormatter)
{
    final String label = cluster.getLabel();//当前类的标题

    // indent up to level and display this cluster's description phrase
    for (int i = 0; i < level; i++)
    {
        System.out.print("  ");
    }
    System.out.println(label + "  "
        + clusterDetailsFormatter.formatClusterDetails(cluster));

    // if this cluster has documents, display three topmost documents.
    int documentsShown = 0;
    for (final Document document : cluster.getDocuments())
    {
        if (documentsShown >= maxNumberOfDocumentsToShow)//如果达到最大展示数的话不再展示
        {
            break;
        }
        displayDocument(level + 1, document);//这个level是干嘛的?
        documentsShown++;//当前分类已经展示的文档数
    }
    if (maxNumberOfDocumentsToShow > 0
        && (cluster.getDocuments().size() > documentsShown))
    {
        System.out.println(getIndent(level + 1) + "... and "
            + (cluster.getDocuments().size() - documentsShown) + " more\n");
    }

    // finally, if this cluster has subclusters, descend into recursion.
    final int num = 1;
    for (final Cluster subcluster : cluster.getSubclusters())
    {
        displayCluster(level + 1, tag + "." + num, subcluster,
            maxNumberOfDocumentsToShow, clusterDetailsFormatter);
    }
}
 
开发者ID:gsh199449,项目名称:DistributedCrawler,代码行数:49,代码来源:ConsoleFormatter.java

示例11: clustersToNamedList

import org.carrot2.core.Document; //导入依赖的package包/类
private void clustersToNamedList(List<Cluster> outputClusters,
                                 List<NamedList<Object>> parent, boolean outputSubClusters, int maxLabels) {
  for (Cluster outCluster : outputClusters) {
    NamedList<Object> cluster = new SimpleOrderedMap<>();
    parent.add(cluster);

    // Add labels
    List<String> labels = outCluster.getPhrases();
    if (labels.size() > maxLabels) {
      labels = labels.subList(0, maxLabels);
    }
    cluster.add("labels", labels);

    // Add cluster score
    final Double score = outCluster.getScore();
    if (score != null) {
      cluster.add("score", score);
    }

    // Add other topics marker
    if (outCluster.isOtherTopics()) {
      cluster.add("other-topics", outCluster.isOtherTopics());
    }

    // Add documents
    List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
    List<Object> docList = Lists.newArrayList();
    cluster.add("docs", docList);
    for (Document doc : docs) {
      docList.add(doc.getField(SOLR_DOCUMENT_ID));
    }

    // Add subclusters
    if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
      List<NamedList<Object>> subclusters = Lists.newArrayList();
      cluster.add("clusters", subclusters);
      clustersToNamedList(outCluster.getSubclusters(), subclusters,
              outputSubClusters, maxLabels);
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:42,代码来源:CarrotClusteringEngine.java

示例12: clustersToNamedList

import org.carrot2.core.Document; //导入依赖的package包/类
private void clustersToNamedList(List<Cluster> outputClusters,
                                 List<NamedList<Object>> parent, boolean outputSubClusters, int maxLabels) {
  for (Cluster outCluster : outputClusters) {
    NamedList<Object> cluster = new SimpleOrderedMap<Object>();
    parent.add(cluster);

    // Add labels
    List<String> labels = outCluster.getPhrases();
    if (labels.size() > maxLabels) {
      labels = labels.subList(0, maxLabels);
    }
    cluster.add("labels", labels);

    // Add cluster score
    final Double score = outCluster.getScore();
    if (score != null) {
      cluster.add("score", score);
    }

    // Add other topics marker
    if (outCluster.isOtherTopics()) {
      cluster.add("other-topics", outCluster.isOtherTopics());
    }

    // Add documents
    List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
    List<Object> docList = Lists.newArrayList();
    cluster.add("docs", docList);
    for (Document doc : docs) {
      docList.add(doc.getField(SOLR_DOCUMENT_ID));
    }

    // Add subclusters
    if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) {
      List<NamedList<Object>> subclusters = Lists.newArrayList();
      cluster.add("clusters", subclusters);
      clustersToNamedList(outCluster.getSubclusters(), subclusters,
              outputSubClusters, maxLabels);
    }
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:42,代码来源:CarrotClusteringEngine.java


注:本文中的org.carrot2.core.Document类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。