本文整理汇总了Java中org.lemurproject.galago.core.retrieval.RetrievalFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java RetrievalFactory.create方法的具体用法?Java RetrievalFactory.create怎么用?Java RetrievalFactory.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.lemurproject.galago.core.retrieval.RetrievalFactory
的用法示例。
在下文中一共展示了RetrievalFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("x")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
long count;
for (String query : (List<String>) p.getAsList("x")) {
Node parsed = StructuredQuery.parse(query);
parsed.getNodeParameters().set("queryType", "count");
Node transformed = r.transformQuery(parsed, Parameters.create());
if (p.get("printTransformation", false)) {
System.err.println(query);
System.err.println(parsed);
System.err.println(transformed);
}
count = r.getNodeStatistics(transformed).nodeFrequency;
output.println(count + "\t" + query);
}
r.close();
}
示例2: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
int numTerms = p.get("numTerms", 10);
Node query = StructuredQuery.parse(p.getString("query"));
Retrieval ret = RetrievalFactory.create(p);
Stemmer stemmer = RelevanceModel1.getStemmer(p, ret);
Node xquery = ret.transformQuery(query, p);
List<ScoredDocument> initialResults = ret.executeQuery(xquery, p).scoredDocuments;
System.err.println("Found "+initialResults.size()+" results for "+query);
Set<String> stemmedQueryTerms = RelevanceModel1.stemTerms(stemmer, StructuredQuery.findQueryTerms(xquery));
Set<String> exclusions = WordLists.getWordList(p.get("rmstopwords", "rmstop"));
Set<String> inclusions = null; // no whitelist
List<WeightedTerm> weightedTerms = RelevanceModel1.extractGrams(ret, initialResults, stemmer, p, stemmedQueryTerms, exclusions, inclusions);
for(int i=0; i<weightedTerms.size() && i<numTerms; i++) {
WeightedTerm wt = weightedTerms.get(i);
System.out.printf("%s\t%f\n",wt.getTerm(), wt.getWeight());
}
}
示例3: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("x")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
long count;
for (String query : (List<String>) p.getList("x")) {
Node parsed = StructuredQuery.parse(query);
parsed.getNodeParameters().set("queryType", "count");
Node transformed = r.transformQuery(parsed, Parameters.create());
if (p.get("printTransformation", false)) {
System.err.println(query);
System.err.println(parsed);
System.err.println(transformed);
}
count = r.getNodeStatistics(transformed).nodeDocumentCount;
output.println(count + "\t" + query);
}
r.close();
}
示例4: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("q")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
for (String query : (List<String>) p.getAsList("q")) {
System.out.println(query);
Node parsed = StructuredQuery.parse(query);
System.out.println(parsed);
Node transformed = r.transformQuery(parsed, Parameters.create());
System.out.println(transformed);
}
r.close();
}
示例5: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval index = RetrievalFactory.create(p);
List<String> ids = new ArrayList<>(Utility.readStreamToStringSet(StreamCreator.openInputStream(p.getString("input"))));
for (int i = 0; i < ids.size(); i+=100) {
List<String> batch = ids.subList(i, Math.min(ids.size(), i+100));
Map<String, Document> docs = index.getDocuments(batch, Document.DocumentComponents.JustText);
for (Document document : docs.values()) {
Parameters docP = Parameters.create();
docP.put("id", document.name);
docP.put("content", document.text);
output.println(docP);
}
}
}
示例6: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
// should check parameters here.
Retrieval retrieval = RetrievalFactory.create(p);
Learner learner = LearnerFactory.instance(p, retrieval);
try {
RetrievalModelInstance tunedParameters = learner.learn();
Parameters instParams = tunedParameters.toParameters();
output.println(instParams.toString());
} finally {
// ensure all buffered streams are correctly flushed.
learner.close();
}
}
示例7: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval index = RetrievalFactory.create(p);
Tokenizer tokenizer = index.getTokenizer();
String text = StreamUtil.copyStreamToString(StreamCreator.openInputStream(p.getString("input")));
Document doc = tokenizer.tokenize(text);
HashSet<String> uniq = new HashSet<>(doc.terms);
List<Parameters> termInfos = new ArrayList<>();
for (String query : uniq) {
Parameters termStats = Parameters.create();
NodeStatistics counts = index.getNodeStatistics(new Node("counts", query));
termStats.set("term", query);
termStats.set("cf", counts.nodeFrequency);
termStats.set("maxTF", counts.maximumCount);
termStats.set("df", counts.nodeDocumentCount);
termInfos.add(termStats);
}
Parameters overall = Parameters.create();
FieldStatistics lengths = index.getCollectionStatistics(new Node("lengths"));
overall.put("clen", lengths.collectionLength);
overall.put("terms", termInfos);
if(p.get("pretty", true)) {
output.println(overall.toPrettyString());
} else {
output.println(overall);
}
}
示例8: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval r = RetrievalFactory.create(p);
List<Parameters> queries = JSONQueryFormat.collectQueries(p);
String printMode = p.get("format", "pretty");
for (Parameters q : queries) {
String id = q.getString("number");
String txt = q.getString("text");
Node raw = StructuredQuery.parse(txt);
Node trans = r.transformQuery(raw, q);
if (p.get("id", true)) {
output.print(id + "\t");
}
if (printMode.startsWith("m")) {
output.println(trans.toString());
} else if (printMode.startsWith("p")) {
output.println(trans.toPrettyString());
} else if (printMode.startsWith("s")) {
output.println(trans.toSimplePrettyString());
} else {
output.println("format: " + printMode + " unknown -- exiting.");
return;
}
}
}
示例9: testExistentialIndicator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testExistentialIndicator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("retrievalGroup", "all");
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node parsedTree = StructuredQuery.parse("#any( #counts:cat:part=postings() #counts:program:part=postings() )");
ExistentialIndicatorIterator eii = (ExistentialIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);
ScoringContext sc = new ScoringContext();
// initial state
assertEquals(2, eii.currentCandidate());
sc.document = 2;
assertTrue(eii.indicator(sc));
assertEquals(true, eii.hasMatch(sc));
eii.syncTo(3);
assertEquals(4, eii.currentCandidate());
sc.document = 4;
assertEquals(true, eii.indicator(sc));
eii.movePast(eii.currentCandidate());
assertTrue(eii.isDone());
retrieval.close();
}
示例10: testUniversalIndicator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testUniversalIndicator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("retrievalGroup", "all");
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node parsedTree = StructuredQuery.parse("#all( #counts:document:part=postings() #counts:sample:part=postings() )");
UniversalIndicatorIterator uii = (UniversalIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);
ScoringContext sc = new ScoringContext();
// initial state
assertEquals(0, uii.currentCandidate());
sc.document = 0;
assertEquals(true, uii.indicator(sc));
assertEquals(true, uii.hasMatch(sc));
uii.syncTo(1);
assertEquals(2, uii.currentCandidate());
sc.document = 2;
assertEquals(true, uii.indicator(sc));
assertEquals(true, uii.hasMatch(sc));
uii.movePast(uii.currentCandidate());
assertEquals(4, uii.currentCandidate());
uii.movePast(uii.currentCandidate());
assertTrue(uii.isDone());
retrieval.close();
}
示例11: testComplexIterator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testComplexIterator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node root = StructuredQuery.parse("#require(#all( #counts:document:part=postings() ) #counts:document:part=postings() )");
root = retrieval.transformQuery(root, p);
ScoringContext dc1 = new ScoringContext();
RequireIterator mi = (RequireIterator) retrieval.createIterator(Parameters.create(), root);
assertEquals(0, mi.currentCandidate());
dc1.document = 0;
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
dc1.document = 2;
assertEquals(2, mi.currentCandidate());
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
dc1.document = 4;
assertEquals(4, mi.currentCandidate());
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
assertTrue(mi.isDone());
}
示例12: testRelevanceModel1Traversal
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testRelevanceModel1Traversal() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("index", indexFile.getAbsolutePath());
p.set("stemmedPostings", false);
p.set("fbOrigWeight", 0.5);
p.set("relevanceModel", RelevanceModel1.class.getName());
p.set("rmwhitelist", "sentiwordlist.txt");
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
RelevanceModelTraversal traversal = new RelevanceModelTraversal(retrieval);
Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( #dirichlet( #extents:jumped:part=postings() ) )");
Node transformed = traversal.traverse(parsedTree, Parameters.create());
// truth data
StringBuilder correct = new StringBuilder();
/* No sentiwordlist.txt
correct.append("#combine:0=0.05001660577881102:1=0.05001660577881102:2=0.04165282851765748:3=0.04165282851765748( ");
correct.append("#text:sample() ");
correct.append("#text:ugly() ");
correct.append("#text:cat() ");
correct.append("#text:moon() )");
*/
assertEquals(transformed.getNodeParameters().get("0", -1.0), 0.05001, 0.00001);
assertEquals(transformed.getNodeParameters().get("1", -1.0), 0.04165, 0.00001);
assertEquals("text", transformed.getChild(0).getOperator());
assertEquals("ugly", transformed.getChild(0).getDefaultParameter());
assertEquals("text", transformed.getChild(1).getOperator());
assertEquals("moon", transformed.getChild(1).getDefaultParameter());
//correct.append("#combine:0=0.05001660577881102:1=0.04165282851765748( ");
//correct.append("#text:ugly() ");
//correct.append("#text:moon() )");
retrieval.close();
}
示例13: testRelevanceModelEmptyTraversal
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Test
public void testRelevanceModelEmptyTraversal() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("index", indexFile.getAbsolutePath());
p.set("stemmedPostings", false);
p.set("fbOrigWeight", 0.9);
p.set("relevanceModel", RelevanceModel3.class.getName());
p.set("rmwhitelist", "sentiwordlist.txt");
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( neverawordinedgewise )");
Node transformed = retrieval.transformQuery(parsedTree, p);
// truth data
StringBuilder correct = new StringBuilder();
correct.append("#combine:fbDocs=10:fbTerms=4:w=1.0( ")
.append("#dirichlet:collectionLength=70:maximumCount=0:nodeFrequency=0:w=1.0( #lengths:document:part=lengths() #counts:neverawordinedgewise:part=postings() ) )");
System.err.println(transformed.toString());
System.err.println(correct.toString());
assertEquals(correct.toString(), transformed.toString());
List <ScoredDocument> results = retrieval.executeQuery(transformed).scoredDocuments;
assertTrue(results.isEmpty());
retrieval.close();
}
示例14: Search
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
public Search(Parameters params) throws Exception {
//this.store = getDocumentStore(params.getAsList("corpus"));
this.retrieval = RetrievalFactory.create(params);
generator = new SnippetGenerator();
}
示例15: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入方法依赖的package包/类
@Override
public void run(Parameters parameters, PrintStream out) throws Exception {
if (!(parameters.containsKey("query")
|| parameters.containsKey("queries"))) {
out.println(this.getHelpString());
return;
}
// ensure we can print to a file instead of the commandline
if (parameters.isString("outputFile")) {
boolean append = parameters.get("appendFile", false);
out = new PrintStream(new BufferedOutputStream(
new FileOutputStream(parameters.getString("outputFile"), append)), true, "UTF-8");
}
// get queries
List<Parameters> queries = JSONQueryFormat.collectQueries(parameters);
// open index
Retrieval retrieval = RetrievalFactory.create(parameters);
// record results requested
int requested = (int) parameters.get("requested", 1000);
CountDownLatch latch = new CountDownLatch(queries.size());
// exception list
List<Exception> exceptions = new ArrayList<>();
// prepare thread pool
int threadCount = (int) parameters.get("threadCount", Runtime.getRuntime().availableProcessors());
ExecutorService threadPool = Executors.newFixedThreadPool(threadCount);
// for each query, create a runner
List<QueryRunner> runners = new ArrayList<>();
for (Parameters query : queries) {
query.setBackoff(parameters);
query.set("requested", requested);
QueryRunner runner = new QueryRunner(retrieval, query, out, exceptions, latch);
runners.add(runner);
threadPool.submit(runner);
}
while (true) {
boolean done = latch.await(1000, TimeUnit.MILLISECONDS);
if (done) {
break;
}
synchronized (logger) {
logger.info("Still running... " + latch.getCount() + " to go.");
}
}
threadPool.shutdown();
if (parameters.isString("outputFile")) {
out.close();
}
}