本文整理汇总了Java中org.lemurproject.galago.core.util.WordLists类的典型用法代码示例。如果您正苦于以下问题:Java WordLists类的具体用法?Java WordLists怎么用?Java WordLists使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WordLists类属于org.lemurproject.galago.core.util包,在下文中一共展示了WordLists类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: afterNode
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
@Override
public Node afterNode(Node original, Parameters queryParameters) throws Exception {
Set<String> stopwords = defaultStopwords;
if (queryParameters.isString("stopwordlist")) {
String stopwordlist = queryParameters.getString("stopwordlist");
stopwords = WordLists.getWordList(stopwordlist);
}
if (original.getOperator().equals("stopword")) {
// remove #stopword from node
Node newHead = new Node("combine", original.getInternalNodes());
// recusively find and remove stopwords from #text nodes
recFindStopWords(newHead, stopwords);
return newHead;
}
return original;
}
示例2: run
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
@Override
public void run(Parameters p, PrintStream output) throws Exception {
int numTerms = p.get("numTerms", 10);
Node query = StructuredQuery.parse(p.getString("query"));
Retrieval ret = RetrievalFactory.create(p);
Stemmer stemmer = RelevanceModel1.getStemmer(p, ret);
Node xquery = ret.transformQuery(query, p);
List<ScoredDocument> initialResults = ret.executeQuery(xquery, p).scoredDocuments;
System.err.println("Found "+initialResults.size()+" results for "+query);
Set<String> stemmedQueryTerms = RelevanceModel1.stemTerms(stemmer, StructuredQuery.findQueryTerms(xquery));
Set<String> exclusions = WordLists.getWordList(p.get("rmstopwords", "rmstop"));
Set<String> inclusions = null; // no whitelist
List<WeightedTerm> weightedTerms = RelevanceModel1.extractGrams(ret, initialResults, stemmer, p, stemmedQueryTerms, exclusions, inclusions);
for(int i=0; i<weightedTerms.size() && i<numTerms; i++) {
WeightedTerm wt = weightedTerms.get(i);
System.out.printf("%s\t%f\n",wt.getTerm(), wt.getWeight());
}
}
示例3: StopStructureTraversal
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
public StopStructureTraversal(Retrieval retrieval) throws IOException {
if (defaultStopStructures == null) {
// default to 'stopStructure' list
String stopstructurelist = retrieval.getGlobalParameters().get("stopstructurelist", "stopStructure");
Set<String> ss_set = WordLists.getWordList(stopstructurelist);
Set<String> stopstr = new TreeSet<>();
for (String ss : ss_set) {
// need to ensure that each ss ends with a space (ensures terms are not cutoff)
stopstr.add(ss.trim() + " ");
}
defaultStopStructures = stopstr;
}
}
示例4: afterNode
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
@Override
public Node afterNode(Node original, Parameters queryParameters) throws Exception {
if (original.getOperator().equals("stopstructure")) {
Node newHead = new Node("combine", original.getInternalNodes());
// find first child node with an array of text nodes
Node parent = newHead;
while (parent.numChildren() == 1 && !parent.getChild(0).getOperator().equals("text")) {
parent = parent.getChild(0);
}
if (parent.numChildren() >= 1 && parent.getChild(0).getOperator().equals("text")) {
Set<String> stopstructures = defaultStopStructures;
if (queryParameters.isString("stopstructurelist")) {
Set<String> ss_set = WordLists.getWordList(queryParameters.getString("stopstructurelist"));
stopstructures = new TreeSet<>();
for (String ss : ss_set) {
// need to ensure that each ss ends with a space (ensures terms are not cutoff)
stopstructures.add(ss.trim() + " ");
}
}
removeStopStructure(parent, stopstructures);
} else {
logger.info("Unable to remove stop structure, could not find array of text-only nodes in :\n" + original.toPrettyString());
}
return newHead;
}
return original;
}
示例5: StopWordTraversal
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
public StopWordTraversal(Retrieval retrieval) throws IOException {
if (defaultStopwords == null) {
// default to 'inquery' list
String stopwordlist = retrieval.getGlobalParameters().get("stopwordlist", "inquery");
defaultStopwords = WordLists.getWordList(stopwordlist);
}
}
示例6: RelevanceModel1
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
public RelevanceModel1(Retrieval r) throws Exception {
this.retrieval = r;
defaultFbDocs = (int) Math.round(r.getGlobalParameters().get("fbDocs", 20.0));
defaultFbTerms = (int) Math.round(r.getGlobalParameters().get("fbTerm", 100.0));
exclusionTerms = WordLists.getWordList(r.getGlobalParameters().get("rmstopwords", "rmstop"));
inclusionTerms = null;
Parameters gblParms = r.getGlobalParameters();
if (gblParms.isString("rmwhitelist")){
inclusionTerms = WordLists.getWordList(r.getGlobalParameters().getString("rmwhitelist"));
}
this.stemmer = getStemmer(gblParms, retrieval);
}
示例7: getStopwords
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
public static Set<String> getStopwords() {
if(stopwords == null) {
try {
stopwords = new HashSet<>();
stopwords.addAll(WordLists.getWordList("inquery"));
stopwords.addAll(customStop);
} catch(IOException ioe) {
throw new RuntimeException(ioe);
}
}
return stopwords;
}
示例8: expand
import org.lemurproject.galago.core.util.WordLists; //导入依赖的package包/类
@Override
public Node expand(Node root, Parameters queryParameters) throws Exception {
int fbDocs = (int) Math.round(root.getNodeParameters().get("fbDocs", queryParameters.get("fbDocs", (double) defaultFbDocs)));
int fbTerms = (int) Math.round(root.getNodeParameters().get("fbTerm", queryParameters.get("fbTerm", (double) defaultFbTerms)));
if (fbDocs <= 0 || fbTerms <= 0) {
logger.info("fbDocs, or fbTerms is invalid, no expansion possible. (<= 0)");
return root;
}
// transform query to ensure it will run
Parameters fbParams = Parameters.create();
fbParams.set("requested", fbDocs);
// first pass is asserted to be document level
fbParams.set("passageQuery", false);
fbParams.set("extentQuery", false);
fbParams.setBackoff(queryParameters);
Node transformed = retrieval.transformQuery(root.clone(), fbParams);
// get some initial results
List<ScoredDocument> initialResults = collectInitialResults(transformed, fbParams);
if (initialResults.isEmpty()) {
return root;
}
// extract grams from results
Set<String> stemmedQueryTerms = stemTerms(stemmer, StructuredQuery.findQueryTerms(transformed));
Set<String> exclusions = (fbParams.isString("rmstopwords")) ? WordLists.getWordList(fbParams.getString("rmstopwords")) : exclusionTerms;
Set<String> inclusions = null;
if (fbParams.isString("rmwhitelist")){
inclusions = WordLists.getWordList(fbParams.getString("rmwhitelist"));
} else {
inclusions = inclusionTerms;
}
List<WeightedTerm> weightedTerms = extractGrams(retrieval, initialResults, stemmer, fbParams, stemmedQueryTerms, exclusions, inclusions);
// select some terms to form exp query node
Node expNode = generateExpansionQuery(weightedTerms, fbTerms);
return expNode;
}