当前位置: 首页>>代码示例>>Java>>正文


Java RetrievalFactory.create方法代码示例

本文整理汇总了Java中org.lemurproject.galago.core.retrieval.RetrievalFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java RetrievalFactory.create方法的具体用法?Java RetrievalFactory.create怎么用?Java RetrievalFactory.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.lemurproject.galago.core.retrieval.RetrievalFactory的用法示例。


在下文中一共展示了RetrievalFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("x")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  long count;
  for (String query : (List<String>) p.getAsList("x")) {
    Node parsed = StructuredQuery.parse(query);
    parsed.getNodeParameters().set("queryType", "count");
    Node transformed = r.transformQuery(parsed, Parameters.create());

    if (p.get("printTransformation", false)) {
      System.err.println(query);
      System.err.println(parsed);
      System.err.println(transformed);
    }

    count = r.getNodeStatistics(transformed).nodeFrequency;
    output.println(count + "\t" + query);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:27,代码来源:XCountFn.java

示例2: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  int numTerms = p.get("numTerms", 10);
  Node query = StructuredQuery.parse(p.getString("query"));
  Retrieval ret = RetrievalFactory.create(p);
  Stemmer stemmer = RelevanceModel1.getStemmer(p, ret);

  Node xquery = ret.transformQuery(query, p);
  List<ScoredDocument> initialResults = ret.executeQuery(xquery, p).scoredDocuments;

  System.err.println("Found "+initialResults.size()+" results for "+query);

  Set<String> stemmedQueryTerms = RelevanceModel1.stemTerms(stemmer, StructuredQuery.findQueryTerms(xquery));
  Set<String> exclusions = WordLists.getWordList(p.get("rmstopwords", "rmstop"));
  Set<String> inclusions = null; // no whitelist

  List<WeightedTerm> weightedTerms = RelevanceModel1.extractGrams(ret, initialResults, stemmer, p, stemmedQueryTerms, exclusions, inclusions);

  for(int i=0; i<weightedTerms.size() && i<numTerms; i++) {
    WeightedTerm wt = weightedTerms.get(i);
    System.out.printf("%s\t%f\n",wt.getTerm(), wt.getWeight());
  }

}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:25,代码来源:GetRMTermsFn.java

示例3: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("x")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  long count;
  for (String query : (List<String>) p.getList("x")) {
    Node parsed = StructuredQuery.parse(query);
    parsed.getNodeParameters().set("queryType", "count");
    Node transformed = r.transformQuery(parsed, Parameters.create());

    if (p.get("printTransformation", false)) {
      System.err.println(query);
      System.err.println(parsed);
      System.err.println(transformed);
    }

    count = r.getNodeStatistics(transformed).nodeDocumentCount;
    output.println(count + "\t" + query);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:27,代码来源:DocCountFn.java

示例4: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  if (!p.containsKey("index") || !p.containsKey("q")) {
    output.println(this.getHelpString());
    return;
  }

  Retrieval r = RetrievalFactory.create(p);

  for (String query : (List<String>) p.getAsList("q")) {
    System.out.println(query);
    Node parsed = StructuredQuery.parse(query);
    System.out.println(parsed);
    Node transformed = r.transformQuery(parsed, Parameters.create());
    System.out.println(transformed);
  }
  r.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:19,代码来源:QueryTransformFn.java

示例5: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {

  Retrieval index = RetrievalFactory.create(p);

  List<String> ids = new ArrayList<>(Utility.readStreamToStringSet(StreamCreator.openInputStream(p.getString("input"))));

  for (int i = 0; i < ids.size(); i+=100) {
    List<String> batch = ids.subList(i, Math.min(ids.size(), i+100));
    Map<String, Document> docs = index.getDocuments(batch, Document.DocumentComponents.JustText);
    for (Document document : docs.values()) {
      Parameters docP = Parameters.create();
      docP.put("id", document.name);
      docP.put("content", document.text);
      output.println(docP);
    }
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:19,代码来源:GetDocsJSONL.java

示例6: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  // should check parameters here.
  Retrieval retrieval = RetrievalFactory.create(p);
  Learner learner = LearnerFactory.instance(p, retrieval);
  try {
    RetrievalModelInstance tunedParameters = learner.learn();
    Parameters instParams = tunedParameters.toParameters();
    output.println(instParams.toString());
  } finally {
    // ensure all buffered streams are correctly flushed.
    learner.close();
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:15,代码来源:LearnQueryParameters.java

示例7: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  Retrieval index = RetrievalFactory.create(p);
  Tokenizer tokenizer = index.getTokenizer();

  String text = StreamUtil.copyStreamToString(StreamCreator.openInputStream(p.getString("input")));

  Document doc = tokenizer.tokenize(text);
  HashSet<String> uniq = new HashSet<>(doc.terms);

  List<Parameters> termInfos = new ArrayList<>();
  for (String query : uniq) {
    Parameters termStats = Parameters.create();
    NodeStatistics counts = index.getNodeStatistics(new Node("counts", query));
    termStats.set("term", query);
    termStats.set("cf", counts.nodeFrequency);
    termStats.set("maxTF", counts.maximumCount);
    termStats.set("df", counts.nodeDocumentCount);
    termInfos.add(termStats);
  }

  Parameters overall = Parameters.create();
  FieldStatistics lengths = index.getCollectionStatistics(new Node("lengths"));
  overall.put("clen", lengths.collectionLength);
  overall.put("terms", termInfos);

  if(p.get("pretty", true)) {
    output.println(overall.toPrettyString());
  } else {
    output.println(overall);
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:33,代码来源:TokenizeAndGrabStats.java

示例8: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
  Retrieval r = RetrievalFactory.create(p);
  List<Parameters> queries = JSONQueryFormat.collectQueries(p);

  String printMode = p.get("format", "pretty");

  for (Parameters q : queries) {
    String id = q.getString("number");
    String txt = q.getString("text");
    Node raw = StructuredQuery.parse(txt);
    Node trans = r.transformQuery(raw, q);

    if (p.get("id", true)) {
      output.print(id + "\t");
    }

    if (printMode.startsWith("m")) {
      output.println(trans.toString());
    } else if (printMode.startsWith("p")) {
      output.println(trans.toPrettyString());
    } else if (printMode.startsWith("s")) {
      output.println(trans.toSimplePrettyString());
    } else {
      output.println("format: " + printMode + " unknown -- exiting.");
      return;
    }
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:30,代码来源:TransformQueryFn.java

示例9: testExistentialIndicator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testExistentialIndicator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("retrievalGroup", "all");
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node parsedTree = StructuredQuery.parse("#any( #counts:cat:part=postings() #counts:program:part=postings() )");

  ExistentialIndicatorIterator eii = (ExistentialIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);

  ScoringContext sc = new ScoringContext();

  // initial state
  assertEquals(2, eii.currentCandidate());
  sc.document = 2;
  assertTrue(eii.indicator(sc));
  assertEquals(true, eii.hasMatch(sc));

  eii.syncTo(3);
  assertEquals(4, eii.currentCandidate());
  sc.document = 4;
  assertEquals(true, eii.indicator(sc));

  eii.movePast(eii.currentCandidate());
  assertTrue(eii.isDone());

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:31,代码来源:IndicatorIteratorTest.java

示例10: testUniversalIndicator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testUniversalIndicator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("retrievalGroup", "all");
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node parsedTree = StructuredQuery.parse("#all( #counts:document:part=postings() #counts:sample:part=postings() )");
  UniversalIndicatorIterator uii = (UniversalIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);

  ScoringContext sc = new ScoringContext();

  // initial state
  assertEquals(0, uii.currentCandidate());
  sc.document = 0;
  assertEquals(true, uii.indicator(sc));
  assertEquals(true, uii.hasMatch(sc));

  uii.syncTo(1);
  assertEquals(2, uii.currentCandidate());
  sc.document = 2;
  assertEquals(true, uii.indicator(sc));
  assertEquals(true, uii.hasMatch(sc));

  uii.movePast(uii.currentCandidate());
  assertEquals(4, uii.currentCandidate());

  uii.movePast(uii.currentCandidate());
  assertTrue(uii.isDone());

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:34,代码来源:IndicatorIteratorTest.java

示例11: testComplexIterator

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testComplexIterator() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("index", indexFile.getAbsolutePath());
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);

  Node root = StructuredQuery.parse("#require(#all( #counts:document:part=postings() ) #counts:document:part=postings() )");
  root = retrieval.transformQuery(root, p);

  ScoringContext dc1 = new ScoringContext();
  RequireIterator mi = (RequireIterator) retrieval.createIterator(Parameters.create(), root);

  assertEquals(0, mi.currentCandidate());
  dc1.document = 0;
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  dc1.document = 2;
  assertEquals(2, mi.currentCandidate());
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  dc1.document = 4;
  assertEquals(4, mi.currentCandidate());
  assertFalse(mi.isDone());

  mi.movePast(mi.currentCandidate());
  assertTrue(mi.isDone());
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:31,代码来源:IndicatorIteratorTest.java

示例12: testRelevanceModel1Traversal

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testRelevanceModel1Traversal() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("index", indexFile.getAbsolutePath());
  p.set("stemmedPostings", false);
  p.set("fbOrigWeight", 0.5);
  p.set("relevanceModel", RelevanceModel1.class.getName());
  p.set("rmwhitelist", "sentiwordlist.txt");
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
  RelevanceModelTraversal traversal = new RelevanceModelTraversal(retrieval);

  Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( #dirichlet( #extents:jumped:part=postings() ) )");
  Node transformed = traversal.traverse(parsedTree, Parameters.create());
  // truth data
  StringBuilder correct = new StringBuilder();
  /* No sentiwordlist.txt
  correct.append("#combine:0=0.05001660577881102:1=0.05001660577881102:2=0.04165282851765748:3=0.04165282851765748( ");
  correct.append("#text:sample() ");
  correct.append("#text:ugly() ");
  correct.append("#text:cat() ");
  correct.append("#text:moon() )");
*/
  assertEquals(transformed.getNodeParameters().get("0", -1.0), 0.05001, 0.00001);
  assertEquals(transformed.getNodeParameters().get("1", -1.0), 0.04165, 0.00001);
  assertEquals("text", transformed.getChild(0).getOperator());
  assertEquals("ugly", transformed.getChild(0).getDefaultParameter());
  assertEquals("text", transformed.getChild(1).getOperator());
  assertEquals("moon", transformed.getChild(1).getDefaultParameter());
  //correct.append("#combine:0=0.05001660577881102:1=0.04165282851765748( ");
  //correct.append("#text:ugly() ");
  //correct.append("#text:moon() )");

  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:36,代码来源:RelevanceFeedbackTraversalTest.java

示例13: testRelevanceModelEmptyTraversal

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testRelevanceModelEmptyTraversal() throws Exception {
  // Create a retrieval object for use by the traversal
  Parameters p = Parameters.create();
  p.set("index", indexFile.getAbsolutePath());
  p.set("stemmedPostings", false);
  p.set("fbOrigWeight", 0.9);
  p.set("relevanceModel", RelevanceModel3.class.getName());
  p.set("rmwhitelist", "sentiwordlist.txt");
  LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
  
  Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( neverawordinedgewise )");
  Node transformed = retrieval.transformQuery(parsedTree, p);
  // truth data
  StringBuilder correct = new StringBuilder();
  correct.append("#combine:fbDocs=10:fbTerms=4:w=1.0( ")
         .append("#dirichlet:collectionLength=70:maximumCount=0:nodeFrequency=0:w=1.0( #lengths:document:part=lengths() #counts:neverawordinedgewise:part=postings() ) )");
      
  System.err.println(transformed.toString());
  System.err.println(correct.toString());

  assertEquals(correct.toString(), transformed.toString());
 
  List <ScoredDocument> results = retrieval.executeQuery(transformed).scoredDocuments;        
  assertTrue(results.isEmpty());
 
  retrieval.close();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:29,代码来源:RelevanceFeedbackTraversalTest.java

示例14: Search

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
public Search(Parameters params) throws Exception {
    //this.store = getDocumentStore(params.getAsList("corpus"));
    this.retrieval = RetrievalFactory.create(params);
    generator = new SnippetGenerator();
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:6,代码来源:Search.java

示例15: run

import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters parameters, PrintStream out) throws Exception {

  if (!(parameters.containsKey("query")
          || parameters.containsKey("queries"))) {
    out.println(this.getHelpString());
    return;
  }

  // ensure we can print to a file instead of the commandline
  if (parameters.isString("outputFile")) {
    boolean append = parameters.get("appendFile", false);
    out = new PrintStream(new BufferedOutputStream(
            new FileOutputStream(parameters.getString("outputFile"), append)), true, "UTF-8");
  }

  // get queries
  List<Parameters> queries = JSONQueryFormat.collectQueries(parameters);

  // open index
  Retrieval retrieval = RetrievalFactory.create(parameters);

  // record results requested
  int requested = (int) parameters.get("requested", 1000);

  CountDownLatch latch = new CountDownLatch(queries.size());

  // exception list
  List<Exception> exceptions = new ArrayList<>();

  // prepare thread pool
  int threadCount = (int) parameters.get("threadCount", Runtime.getRuntime().availableProcessors());
  ExecutorService threadPool = Executors.newFixedThreadPool(threadCount);

  // for each query, create a runner
  List<QueryRunner> runners = new ArrayList<>();
  for (Parameters query : queries) {

    query.setBackoff(parameters);
    query.set("requested", requested);

    QueryRunner runner = new QueryRunner(retrieval, query, out, exceptions, latch);
    runners.add(runner);
    threadPool.submit(runner);
  }

  while (true) {
    boolean done = latch.await(1000, TimeUnit.MILLISECONDS);
    if (done) {
      break;
    }
    synchronized (logger) {
      logger.info("Still running... " + latch.getCount() + " to go.");
    }
  }

  threadPool.shutdown();

  if (parameters.isString("outputFile")) {
    out.close();
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:63,代码来源:ThreadedBatchSearch.java


注:本文中的org.lemurproject.galago.core.retrieval.RetrievalFactory.create方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。