本文整理汇总了Java中org.lemurproject.galago.core.retrieval.RetrievalFactory类的典型用法代码示例。如果您正苦于以下问题:Java RetrievalFactory类的具体用法?Java RetrievalFactory怎么用?Java RetrievalFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
RetrievalFactory类属于org.lemurproject.galago.core.retrieval包,在下文中一共展示了RetrievalFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: verifyIndexStructures
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
public static void verifyIndexStructures(File indexPath) throws Exception {
// Check main path
assertTrue(indexPath.isDirectory());
// Time to check standard parts
Retrieval ret = RetrievalFactory.instance(indexPath.getAbsolutePath(), Parameters.create());
Parameters availableParts = ret.getAvailableParts();
assertNotNull(availableParts);
// ensure that we have (at least) the basic parts
assertTrue(availableParts.containsKey("lengths"));
assertTrue(availableParts.containsKey("names"));
assertTrue(availableParts.containsKey("names.reverse"));
assertTrue(availableParts.containsKey("postings"));
for (String part : availableParts.getKeys()){
File childPath = new File(indexPath, part);
assertTrue(childPath.exists());
}
}
示例2: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("x")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
long count;
for (String query : (List<String>) p.getAsList("x")) {
Node parsed = StructuredQuery.parse(query);
parsed.getNodeParameters().set("queryType", "count");
Node transformed = r.transformQuery(parsed, Parameters.create());
if (p.get("printTransformation", false)) {
System.err.println(query);
System.err.println(parsed);
System.err.println(transformed);
}
count = r.getNodeStatistics(transformed).nodeFrequency;
output.println(count + "\t" + query);
}
r.close();
}
示例3: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
int numTerms = p.get("numTerms", 10);
Node query = StructuredQuery.parse(p.getString("query"));
Retrieval ret = RetrievalFactory.create(p);
Stemmer stemmer = RelevanceModel1.getStemmer(p, ret);
Node xquery = ret.transformQuery(query, p);
List<ScoredDocument> initialResults = ret.executeQuery(xquery, p).scoredDocuments;
System.err.println("Found "+initialResults.size()+" results for "+query);
Set<String> stemmedQueryTerms = RelevanceModel1.stemTerms(stemmer, StructuredQuery.findQueryTerms(xquery));
Set<String> exclusions = WordLists.getWordList(p.get("rmstopwords", "rmstop"));
Set<String> inclusions = null; // no whitelist
List<WeightedTerm> weightedTerms = RelevanceModel1.extractGrams(ret, initialResults, stemmer, p, stemmedQueryTerms, exclusions, inclusions);
for(int i=0; i<weightedTerms.size() && i<numTerms; i++) {
WeightedTerm wt = weightedTerms.get(i);
System.out.printf("%s\t%f\n",wt.getTerm(), wt.getWeight());
}
}
示例4: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("x")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
long count;
for (String query : (List<String>) p.getList("x")) {
Node parsed = StructuredQuery.parse(query);
parsed.getNodeParameters().set("queryType", "count");
Node transformed = r.transformQuery(parsed, Parameters.create());
if (p.get("printTransformation", false)) {
System.err.println(query);
System.err.println(parsed);
System.err.println(transformed);
}
count = r.getNodeStatistics(transformed).nodeDocumentCount;
output.println(count + "\t" + query);
}
r.close();
}
示例5: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
if (!p.containsKey("index") || !p.containsKey("q")) {
output.println(this.getHelpString());
return;
}
Retrieval r = RetrievalFactory.create(p);
for (String query : (List<String>) p.getAsList("q")) {
System.out.println(query);
Node parsed = StructuredQuery.parse(query);
System.out.println(parsed);
Node transformed = r.transformQuery(parsed, Parameters.create());
System.out.println(transformed);
}
r.close();
}
示例6: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval index = RetrievalFactory.create(p);
List<String> ids = new ArrayList<>(Utility.readStreamToStringSet(StreamCreator.openInputStream(p.getString("input"))));
for (int i = 0; i < ids.size(); i+=100) {
List<String> batch = ids.subList(i, Math.min(ids.size(), i+100));
Map<String, Document> docs = index.getDocuments(batch, Document.DocumentComponents.JustText);
for (Document document : docs.values()) {
Parameters docP = Parameters.create();
docP.put("id", document.name);
docP.put("content", document.text);
output.println(docP);
}
}
}
示例7: verifyIndexStructures
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
public static void verifyIndexStructures(File indexPath) throws Exception {
// Check main path
assertTrue(indexPath.isDirectory());
// Time to check standard parts
Retrieval ret = RetrievalFactory.instance(indexPath.getAbsolutePath(), Parameters.create());
Parameters availableParts = ret.getAvailableParts();
assertNotNull(availableParts);
// ensure that we have (at least) the basic parts
assertTrue(availableParts.containsKey("lengths"));
assertTrue(availableParts.containsKey("names"));
assertTrue(availableParts.containsKey("names.reverse"));
assertTrue(availableParts.containsKey("postings"));
for (String part : availableParts.getKeys()){
File childPath = new File(indexPath, part);
assertTrue(childPath.exists());
}
}
示例8: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
// should check parameters here.
Retrieval retrieval = RetrievalFactory.create(p);
Learner learner = LearnerFactory.instance(p, retrieval);
try {
RetrievalModelInstance tunedParameters = learner.learn();
Parameters instParams = tunedParameters.toParameters();
output.println(instParams.toString());
} finally {
// ensure all buffered streams are correctly flushed.
learner.close();
}
}
示例9: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval index = RetrievalFactory.create(p);
Tokenizer tokenizer = index.getTokenizer();
String text = StreamUtil.copyStreamToString(StreamCreator.openInputStream(p.getString("input")));
Document doc = tokenizer.tokenize(text);
HashSet<String> uniq = new HashSet<>(doc.terms);
List<Parameters> termInfos = new ArrayList<>();
for (String query : uniq) {
Parameters termStats = Parameters.create();
NodeStatistics counts = index.getNodeStatistics(new Node("counts", query));
termStats.set("term", query);
termStats.set("cf", counts.nodeFrequency);
termStats.set("maxTF", counts.maximumCount);
termStats.set("df", counts.nodeDocumentCount);
termInfos.add(termStats);
}
Parameters overall = Parameters.create();
FieldStatistics lengths = index.getCollectionStatistics(new Node("lengths"));
overall.put("clen", lengths.collectionLength);
overall.put("terms", termInfos);
if(p.get("pretty", true)) {
output.println(overall.toPrettyString());
} else {
output.println(overall);
}
}
示例10: run
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
Retrieval r = RetrievalFactory.create(p);
List<Parameters> queries = JSONQueryFormat.collectQueries(p);
String printMode = p.get("format", "pretty");
for (Parameters q : queries) {
String id = q.getString("number");
String txt = q.getString("text");
Node raw = StructuredQuery.parse(txt);
Node trans = r.transformQuery(raw, q);
if (p.get("id", true)) {
output.print(id + "\t");
}
if (printMode.startsWith("m")) {
output.println(trans.toString());
} else if (printMode.startsWith("p")) {
output.println(trans.toPrettyString());
} else if (printMode.startsWith("s")) {
output.println(trans.toSimplePrettyString());
} else {
output.println("format: " + printMode + " unknown -- exiting.");
return;
}
}
}
示例11: testAnnotatedNodes
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testAnnotatedNodes() throws Exception {
File[] files = LocalRetrievalTest.make10DocIndex();
files[0].delete();
FSUtil.deleteDirectory(files[1]);
File indexFile = files[2];
try {
LocalRetrieval r = (LocalRetrieval) RetrievalFactory.instance(indexFile.getAbsolutePath(), Parameters.create());
String qtext = "#combine( sample document )";
Node qnode = StructuredQuery.parse(qtext);
qnode = r.transformQuery(qnode, Parameters.create());
ProcessingModel proc = new RankedDocumentModel(r);
Parameters p = Parameters.create();
p.set("requested", 100);
p.set("annotate", true);
ScoredDocument[] results = proc.execute(qnode, p);
AnnotatedNode prev = null;
for (ScoredDocument d : results) {
assertNotNull(d.annotation);
AnnotatedNode anode = d.annotation;
assertTrue(anode.atCandidate);
if (prev != null) {
assert (Double.parseDouble(prev.returnValue) > Double.parseDouble(anode.returnValue));
}
prev = anode;
}
} finally {
if (indexFile != null) {
FSUtil.deleteDirectory(indexFile);
}
}
}
示例12: testExistentialIndicator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testExistentialIndicator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("retrievalGroup", "all");
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node parsedTree = StructuredQuery.parse("#any( #counts:cat:part=postings() #counts:program:part=postings() )");
ExistentialIndicatorIterator eii = (ExistentialIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);
ScoringContext sc = new ScoringContext();
// initial state
assertEquals(2, eii.currentCandidate());
sc.document = 2;
assertTrue(eii.indicator(sc));
assertEquals(true, eii.hasMatch(sc));
eii.syncTo(3);
assertEquals(4, eii.currentCandidate());
sc.document = 4;
assertEquals(true, eii.indicator(sc));
eii.movePast(eii.currentCandidate());
assertTrue(eii.isDone());
retrieval.close();
}
示例13: testUniversalIndicator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testUniversalIndicator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("retrievalGroup", "all");
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node parsedTree = StructuredQuery.parse("#all( #counts:document:part=postings() #counts:sample:part=postings() )");
UniversalIndicatorIterator uii = (UniversalIndicatorIterator) retrieval.createIterator(Parameters.create(), parsedTree);
ScoringContext sc = new ScoringContext();
// initial state
assertEquals(0, uii.currentCandidate());
sc.document = 0;
assertEquals(true, uii.indicator(sc));
assertEquals(true, uii.hasMatch(sc));
uii.syncTo(1);
assertEquals(2, uii.currentCandidate());
sc.document = 2;
assertEquals(true, uii.indicator(sc));
assertEquals(true, uii.hasMatch(sc));
uii.movePast(uii.currentCandidate());
assertEquals(4, uii.currentCandidate());
uii.movePast(uii.currentCandidate());
assertTrue(uii.isDone());
retrieval.close();
}
示例14: testComplexIterator
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testComplexIterator() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("index", indexFile.getAbsolutePath());
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
Node root = StructuredQuery.parse("#require(#all( #counts:document:part=postings() ) #counts:document:part=postings() )");
root = retrieval.transformQuery(root, p);
ScoringContext dc1 = new ScoringContext();
RequireIterator mi = (RequireIterator) retrieval.createIterator(Parameters.create(), root);
assertEquals(0, mi.currentCandidate());
dc1.document = 0;
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
dc1.document = 2;
assertEquals(2, mi.currentCandidate());
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
dc1.document = 4;
assertEquals(4, mi.currentCandidate());
assertFalse(mi.isDone());
mi.movePast(mi.currentCandidate());
assertTrue(mi.isDone());
}
示例15: testRelevanceModel1Traversal
import org.lemurproject.galago.core.retrieval.RetrievalFactory; //导入依赖的package包/类
@Test
public void testRelevanceModel1Traversal() throws Exception {
// Create a retrieval object for use by the traversal
Parameters p = Parameters.create();
p.set("index", indexFile.getAbsolutePath());
p.set("stemmedPostings", false);
p.set("fbOrigWeight", 0.5);
p.set("relevanceModel", RelevanceModel1.class.getName());
p.set("rmwhitelist", "sentiwordlist.txt");
LocalRetrieval retrieval = (LocalRetrieval) RetrievalFactory.create(p);
RelevanceModelTraversal traversal = new RelevanceModelTraversal(retrieval);
Node parsedTree = StructuredQuery.parse("#rm:fbDocs=10:fbTerms=4( #dirichlet( #extents:jumped:part=postings() ) )");
Node transformed = traversal.traverse(parsedTree, Parameters.create());
// truth data
StringBuilder correct = new StringBuilder();
/* No sentiwordlist.txt
correct.append("#combine:0=0.05001660577881102:1=0.05001660577881102:2=0.04165282851765748:3=0.04165282851765748( ");
correct.append("#text:sample() ");
correct.append("#text:ugly() ");
correct.append("#text:cat() ");
correct.append("#text:moon() )");
*/
assertEquals(transformed.getNodeParameters().get("0", -1.0), 0.05001, 0.00001);
assertEquals(transformed.getNodeParameters().get("1", -1.0), 0.04165, 0.00001);
assertEquals("text", transformed.getChild(0).getOperator());
assertEquals("ugly", transformed.getChild(0).getDefaultParameter());
assertEquals("text", transformed.getChild(1).getOperator());
assertEquals("moon", transformed.getChild(1).getDefaultParameter());
//correct.append("#combine:0=0.05001660577881102:1=0.04165282851765748( ");
//correct.append("#text:ugly() ");
//correct.append("#text:moon() )");
retrieval.close();
}