当前位置: 首页>>代码示例>>Java>>正文


Java RetrievalFactory类代码示例

本文整理汇总了Java中org.lemurproject.galago.core.retrieval.RetrievalFactory的典型用法代码示例。如果您正苦于以下问题:Java RetrievalFactory类的具体用法?Java RetrievalFactory怎么用?Java RetrievalFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


RetrievalFactory类属于org.lemurproject.galago.core.retrieval包,在下文中一共展示了RetrievalFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: verifyIndexStructures

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
public static void verifyIndexStructures(File indexPath) throws Exception {
  // Check main path
  assertTrue(indexPath.isDirectory());
  // Time to check standard parts
  Retrieval ret = RetrievalFactory.instance(indexPath.getAbsolutePath(), Parameters.create());
  Parameters availableParts = ret.getAvailableParts();
  assertNotNull(availableParts);

  // ensure that we have (at least) the basic parts
  assertTrue(availableParts.containsKey("lengths"));
  assertTrue(availableParts.containsKey("names"));
  assertTrue(availableParts.containsKey("names.reverse"));
  assertTrue(availableParts.containsKey("postings"));
  
  for (String part : availableParts.getKeys()){
    File childPath = new File(indexPath, part);
    assertTrue(childPath.exists());
  }

}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:21,代码来源:TestingUtils.java

示例2: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("x")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  long count;
  for (String query : (List<String>) p.getAsList("x")) {
    Node parsed = StructuredQuery.parse(query);
    parsed.getNodeParameters().set("queryType", "count");
    Node transformed = r.transformQuery(parsed, Parameters.create());

    if (p.get("printTransformation", false)) {
      System.err.println(query);
      System.err.println(parsed);
      System.err.println(transformed);
    }

    count = r.getNodeStatistics(transformed).nodeFrequency;
    output.println(count + "\t" + query);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:27,代码来源:XCountFn.java

示例3: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  int numTerms = p.get("numTerms", 10);
  Node query = StructuredQuery.parse(p.getString("query"));
  Retrieval ret = RetrievalFactory.create(p);
  Stemmer stemmer = RelevanceModel1.getStemmer(p, ret);

  Node xquery = ret.transformQuery(query, p);
  List<ScoredDocument> initialResults = ret.executeQuery(xquery, p).scoredDocuments;

  System.err.println("Found "+initialResults.size()+" results for "+query);

  Set<String> stemmedQueryTerms = RelevanceModel1.stemTerms(stemmer, StructuredQuery.findQueryTerms(xquery));
  Set<String> exclusions = WordLists.getWordList(p.get("rmstopwords", "rmstop"));
  Set<String> inclusions = null; // no whitelist

  List<WeightedTerm> weightedTerms = RelevanceModel1.extractGrams(ret, initialResults, stemmer, p, stemmedQueryTerms, exclusions, inclusions);

  for(int i=0; i<weightedTerms.size() && i<numTerms; i++) {
    WeightedTerm wt = weightedTerms.get(i);
    System.out.printf("%s\t%f\n",wt.getTerm(), wt.getWeight());
  }

}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:25,代码来源:GetRMTermsFn.java

示例4: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("x")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  long count;
  for (String query : (List<String>) p.getList("x")) {
    Node parsed = StructuredQuery.parse(query);
    parsed.getNodeParameters().set("queryType", "count");
    Node transformed = r.transformQuery(parsed, Parameters.create());

    if (p.get("printTransformation", false)) {
      System.err.println(query);
      System.err.println(parsed);
      System.err.println(transformed);
    }

    count = r.getNodeStatistics(transformed).nodeDocumentCount;
    output.println(count + "\t" + query);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:27,代码来源:DocCountFn.java

示例5: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("q")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  for (String query : (List<String>) p.getAsList("q")) {
    System.out.println(query);
    Node parsed = StructuredQuery.parse(query);
    System.out.println(parsed);
    Node transformed = r.transformQuery(parsed, Parameters.create());
    System.out.println(transformed);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:19,代码来源:QueryTransformFn.java

示例6: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {

  Retrieval index = RetrievalFactory.create(p);

  List<String> ids = new ArrayList<>(Utility.readStreamToStringSet(StreamCreator.openInputStream(p.getString("input"))));

  for (int i = 0; i < ids.size(); i+=100) {
    List<String> batch = ids.subList(i, Math.min(ids.size(), i+100));
    Map<String, Document> docs = index.getDocuments(batch, Document.DocumentComponents.JustText);
    for (Document document : docs.values()) {
      Parameters docP = Parameters.create();
      docP.put("id", document.name);
      docP.put("content", document.text);
      output.println(docP);
    }
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:19,代码来源:GetDocsJSONL.java

示例7: verifyIndexStructures

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
public static void verifyIndexStructures(File indexPath) throws Exception {
    // Check main path
    assertTrue(indexPath.isDirectory());
    // Time to check standard parts
    Retrieval ret = RetrievalFactory.instance(indexPath.getAbsolutePath(), Parameters.create());
    Parameters availableParts = ret.getAvailableParts();
    assertNotNull(availableParts);

    // ensure that we have (at least) the basic parts
    assertTrue(availableParts.containsKey("lengths"));
    assertTrue(availableParts.containsKey("names"));
    assertTrue(availableParts.containsKey("names.reverse"));
    assertTrue(availableParts.containsKey("postings"));

    for (String part : availableParts.getKeys()){
      File childPath = new File(indexPath, part);
      assertTrue(childPath.exists());
    }

}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:21,代码来源:AppTest.java

示例8: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  // should check parameters here.
  Retrieval retrieval = RetrievalFactory.create(p);
  Learner learner = LearnerFactory.instance(p, retrieval);
  try {
    RetrievalModelInstance tunedParameters = learner.learn();
    Parameters instParams = tunedParameters.toParameters();
    output.println(instParams.toString());
  } finally {
    // ensure all buffered streams are correctly flushed.
    learner.close();
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:15,代码来源:LearnQueryParameters.java

示例9: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  Retrieval index = RetrievalFactory.create(p);
  Tokenizer tokenizer = index.getTokenizer();

  String text = StreamUtil.copyStreamToString(StreamCreator.openInputStream(p.getString("input")));

  Document doc = tokenizer.tokenize(text);
  HashSet<String> uniq = new HashSet<>(doc.terms);

  List<Parameters> termInfos = new ArrayList<>();
  for (String query : uniq) {
    Parameters termStats = Parameters.create();
    NodeStatistics counts = index.getNodeStatistics(new Node("counts", query));
    termStats.set("term", query);
    termStats.set("cf", counts.nodeFrequency);
    termStats.set("maxTF", counts.maximumCount);
    termStats.set("df", counts.nodeDocumentCount);
    termInfos.add(termStats);
  }

  Parameters overall = Parameters.create();
  FieldStatistics lengths = index.getCollectionStatistics(new Node("lengths"));
  overall.put("clen", lengths.collectionLength);
  overall.put("terms", termInfos);

  if(p.get("pretty", true)) {
    output.println(overall.toPrettyString());
  } else {
    output.println(overall);
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:33,代码来源:TokenizeAndGrabStats.java

示例10: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  Retrieval r = RetrievalFactory.create(p);
  List<Parameters> queries = JSONQueryFormat.collectQueries(p);

  String printMode = p.get("format", "pretty");

  for (Parameters q : queries) {
    String id = q.getString("number");
    String txt = q.getString("text");
    Node raw = StructuredQuery.parse(txt);
    Node trans = r.transformQuery(raw, q);

    if (p.get("id", true)) {
      output.print(id + "\t");
    }

    if (printMode.startsWith("m")) {
      output.println(trans.toString());
    } else if (printMode.startsWith("p")) {
      output.println(trans.toPrettyString());
    } else if (printMode.startsWith("s")) {
      output.println(trans.toSimplePrettyString());
    } else {
      output.println("format: " + printMode + " unknown -- exiting.");
      return;
    }
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:30,代码来源:TransformQueryFn.java

示例11: testAnnotatedNodes

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testAnnotatedNodes() throws Exception {
  File[] files = LocalRetrievalTest.make10DocIndex();
  files[0].delete();
  FSUtil.deleteDirectory(files[1]);
  File indexFile = files[2];
  try {
    LocalRetrieval r = (LocalRetrieval) RetrievalFactory.instance(indexFile.getAbsolutePath(), Parameters.create());

    String qtext = "#combine( sample document )";
    Node qnode = StructuredQuery.parse(qtext);
    qnode = r.transformQuery(qnode, Parameters.create());
    ProcessingModel proc = new RankedDocumentModel(r);
    Parameters p = Parameters.create();
    p.set("requested", 100);
    p.set("annotate", true);
    ScoredDocument[] results = proc.execute(qnode, p);
    AnnotatedNode prev = null;
    for (ScoredDocument d : results) {
      assertNotNull(d.annotation);
      AnnotatedNode anode = d.annotation;
      assertTrue(anode.atCandidate);
      if (prev != null) {
        assert (Double.parseDouble(prev.returnValue) > Double.parseDouble(anode.returnValue));
      }
      prev = anode;
    }
  } finally {
    if (indexFile != null) {
      FSUtil.deleteDirectory(indexFile);
    }
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:34,代码来源:AnnotatedNodeTest.java

示例12: testExistentialIndicator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testExistentialIndicator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("retrievalGroup", "all");
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node parsedTree = StructuredQuery.parse("#any( #counts:cat:part=postings() #counts:program:part=postings() )");

  ExistentialIndicatorIterator eii = (ExistentialIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);

  ScoringContext sc = new ScoringContext();

  // initial state
  assertEquals(2, eii.currentCandidate());
  sc.document = 2;
  assertTrue(eii.indicator(sc));
  assertEquals(true, eii.hasMatch(sc));

  eii.syncTo(3);
  assertEquals(4, eii.currentCandidate());
  sc.document = 4;
  assertEquals(true, eii.indicator(sc));

  eii.movePast(eii.currentCandidate());
  assertTrue(eii.isDone());

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:31,代码来源:IndicatorIteratorTest.java

示例13: testUniversalIndicator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testUniversalIndicator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("retrievalGroup", "all");
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node parsedTree = StructuredQuery.parse("#all( #counts:document:part=postings() #counts:sample:part=postings() )");
  UniversalIndicatorIterator uii = (UniversalIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);

  ScoringContext sc = new ScoringContext();

  // initial state
  assertEquals(0, uii.currentCandidate());
  sc.document = 0;
  assertEquals(true, uii.indicator(sc));
  assertEquals(true, uii.hasMatch(sc));

  uii.syncTo(1);
  assertEquals(2, uii.currentCandidate());
  sc.document = 2;
  assertEquals(true, uii.indicator(sc));
  assertEquals(true, uii.hasMatch(sc));

  uii.movePast(uii.currentCandidate());
  assertEquals(4, uii.currentCandidate());

  uii.movePast(uii.currentCandidate());
  assertTrue(uii.isDone());

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:34,代码来源:IndicatorIteratorTest.java

示例14: testComplexIterator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testComplexIterator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node root = StructuredQuery.parse("#require(#all( #counts:document:part=postings() ) #counts:document:part=postings() )");
  root = retrieval.transformQuery(root, p);

  ScoringContext dc1 = new ScoringContext();
  RequireIterator mi = (RequireIterator) retrieval.createIterator(Parameters.create(), root);

  assertEquals(0, mi.currentCandidate());
  dc1.document = 0;
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  dc1.document = 2;
  assertEquals(2, mi.currentCandidate());
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  dc1.document = 4;
  assertEquals(4, mi.currentCandidate());
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  assertTrue(mi.isDone());
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:31,代码来源:IndicatorIteratorTest.java

示例15: testRelevanceModel1Traversal

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testRelevanceModel1Traversal() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("index", indexFile.getAbsolutePath());
  p.set("stemmedPostings", false);
  p.set("fbOrigWeight", 0.5);
  p.set("relevanceModel", RelevanceModel1.class.getName());
  p.set("rmwhitelist", "sentiwordlist.txt");
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
  RelevanceModelTraversal traversal = new RelevanceModelTraversal(retrieval);

  Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( #dirichlet( #extents:jumped:part=postings() ) )");
  Node transformed = traversal.traverse(parsedTree, Parameters.create());
  // truth data
  StringBuilder correct = new StringBuilder();
  /* No sentiwordlist.txt
  correct.append("#combine:0=0.05001660577881102:1=0.05001660577881102:2=0.04165282851765748:3=0.04165282851765748( ");
  correct.append("#text:sample() ");
  correct.append("#text:ugly() ");
  correct.append("#text:cat() ");
  correct.append("#text:moon() )");
*/
  assertEquals(transformed.getNodeParameters().get("0", -1.0), 0.05001, 0.00001);
  assertEquals(transformed.getNodeParameters().get("1", -1.0), 0.04165, 0.00001);
  assertEquals("text", transformed.getChild(0).getOperator());
  assertEquals("ugly", transformed.getChild(0).getDefaultParameter());
  assertEquals("text", transformed.getChild(1).getOperator());
  assertEquals("moon", transformed.getChild(1).getDefaultParameter());
  //correct.append("#combine:0=0.05001660577881102:1=0.04165282851765748( ");
  //correct.append("#text:ugly() ");
  //correct.append("#text:moon() )");

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:36,代码来源:RelevanceFeedbackTraversalTest.java


注:本文中的org.lemurproject.galago.core.retrieval.RetrievalFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。