本文整理汇总了Java中info.ephyra.nlp.SnowballStemmer.stem方法的典型用法代码示例。如果您正苦于以下问题:Java SnowballStemmer.stem方法的具体用法?Java SnowballStemmer.stem怎么用?Java SnowballStemmer.stem使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类info.ephyra.nlp.SnowballStemmer
的用法示例。
在下文中一共展示了SnowballStemmer.stem方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: match
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
* Checks if the first phrase is inclusive of the second
* @param npq parsed string
* @param npt parsed string
*/
private static boolean match(String npq, String npt) {
String q = unparse(npq).replace("'s", "").replace("'", "");
String t = unparse(npt).replace("'s", "").replace("'", "");
boolean exists;
for (String token1 : q.split(" ")) {
token1 = SnowballStemmer.stem(token1);
exists = false;
for (String token2 : t.split(" ")) {
token2 = SnowballStemmer.stem(token2);
// System.out.println(token1 + ":" + token2);
if (token1.equalsIgnoreCase(token2)) {
exists = true;
break;
}
}
if (!exists) {
return false;
}
}
return true;
}
示例2: fuzzyContainsToken
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
* Does a fuzzy lookup for a token. The specified token t is considered as
* contained in the dictionary is there is a token T in the dictionary such
* that <code>LevenshteinDistance(t, T) <= maxDistance</code>
*
* @param token the token to look up
* @param maxDistance the maximum Levenshtein edit distance for fuzzy
* comparison
* @return <code>true</code> iff a word in the dictionary contains the token
*/
public boolean fuzzyContainsToken(String token, int maxDistance) {
token = SnowballStemmer.stem(token.trim().toLowerCase());
if (maxDistance == 0) return this.tokens.contains(token);
else if (this.tokens.contains(token)) return true;
Iterator<String> tokenIter = this.tokens.iterator();
while (tokenIter.hasNext())
if (getLevenshteinDistance(token, tokenIter.next(), maxDistance, true, 1, 1) <= maxDistance) return true;
return false;
}
示例3: normalize
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
* Normalizes a string. Similar strings are mapped to equal normalizations.
*
* @param s the string
* @return normalized string
*/
// TODO use noun and verb stemming (also for equals...Norm() methods)
public static String normalize(String s) {
// convert to lower-case
s = s.toLowerCase();
// tokenize
String tokens[] = NETagger.tokenize(s);
// stemm all tokens
for (int i = 0; i < tokens.length; i++)
tokens[i] = SnowballStemmer.stem(tokens[i]);
return concatWithSpaces(tokens);
}
示例4: getTermCounters
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/** @see info.ephyra.answerselection.filters.WebTermImportanceFilter#getTermCounters(java.lang.String[])
*/
@Override
public HashMap<String, TermCounter> getTermCounters(String[] targets) {
HashMap<String, TermCounter> termCounters = new HashMap<String, TermCounter>();
for (String target : targets) {
// get snippets from yahoo
SearchClient client = new SearchClient(YAHOO_ID);
// create request
WebSearchRequest request = new WebSearchRequest(target);
request.setLanguage("en"); // search for English pages only
request.setStart(BigInteger.valueOf(0));
request.setResults(MAX_RESULTS_PERQUERY);
// perform search
WebSearchResult[] searchResults = null;
int retries = 0;
while (searchResults == null)
try {
searchResults = client.webSearch(request).listResults();
} catch (Exception e) {
MsgPrinter.printSearchError(e); // print search error message
if (retries == RETRIES) {
MsgPrinter.printErrorMsg("\nSearch failed.");
System.exit(1);
}
retries++;
try {
YahooKM.sleep(1000);
} catch (InterruptedException ie) {}
}
// parse yahoo snippets
int lengthSum = 0;
for (int i = 0; i < searchResults.length; i++) {
String summary = searchResults[i].getSummary();
if (summary != null) {
// tokenize and tag sentence
String[] sentence = NETagger.tokenize(summary);
lengthSum += sentence.length;
// scan sentence for NPs
for (int s = 0; s < sentence.length; s++) {
String term = SnowballStemmer.stem(sentence[s].toLowerCase());
if (term.length() > 1) {
if (!termCounters.containsKey(term))
termCounters.put(term, new TermCounter());
termCounters.get(term).increment();
}
}
}
}
}
return termCounters;
}
示例5: getGoogleTermCounters
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
private HashMap<String, TermCounter> getGoogleTermCounters(String target) {
HashMap<String, TermCounter> targetTermCounters = new HashMap<String, TermCounter>();
// subsequently get top MAX_RESULTS_TOTAL snippets, MAX_RESULTS_PERQUERY each time
for (int startResult = 0; startResult < MAX_RESULTS_TOTAL; startResult += MAX_RESULTS_PERQUERY) {
// get snippets from google
GoogleSearch search = new GoogleSearch();
if (TEST_TARGET_GENERATION) System.out.println("Got search ...");
// set license key
search.setKey(GOOGLE_KEY);
if (TEST_TARGET_GENERATION) System.out.println(" - key is " + GOOGLE_KEY);
// set search string
search.setQueryString(target);
if (TEST_TARGET_GENERATION) System.out.println(" - target is " + target);
// set language to English only
search.setLanguageRestricts("English");
if (TEST_TARGET_GENERATION) System.out.println(" - language set");
// set hit position of first search result
search.setStartResult(startResult);
if (TEST_TARGET_GENERATION) System.out.println(" - start result set to " + startResult);
// set maximum number of search results
search.setMaxResults(MAX_RESULTS_PERQUERY);
if (TEST_TARGET_GENERATION) System.out.println(" - max results set");
// perform search
GoogleSearchResult googleResult = null;
int retries = 0;
while (googleResult == null)
try {
googleResult = search.doSearch();
} catch (GoogleSearchFault e) {
MsgPrinter.printSearchError(e); // print search error message
if (retries == RETRIES) {
MsgPrinter.printErrorMsg("\nSearch failed.");
//System.exit(1);
return targetTermCounters;
}
retries++;
try {
GoogleKM.sleep(1000);
} catch (InterruptedException ie) {}
}
// get snippets
GoogleSearchResultElement[] elements = googleResult.getResultElements();
if (TEST_TARGET_GENERATION) System.out.println(" - got results: " + elements.length);
// parse google snippets
int lengthSum = 0;
for (int i = 0; i < elements.length; i++) {
String plain = elements[i].getSnippet().replaceAll("\\<[^\\>]++\\>", " ");
plain = plain.replaceAll("\\&\\#39\\;", "'");
if (TEST_TARGET_GENERATION) System.out.println(" - plain: " + plain);
// tokenize and tag sentence
String[] sentence = NETagger.tokenize(plain);
lengthSum += sentence.length;
// scan sentence for NPs
for (int s = 0; s < sentence.length; s++) {
String term = SnowballStemmer.stem(sentence[s].toLowerCase());
if (term.length() > 1) {
if (!targetTermCounters.containsKey(term))
targetTermCounters.put(term, new TermCounter());
targetTermCounters.get(term).increment();
}
}
}
}
return targetTermCounters;
}
示例6: containsToken
import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
* Looks up a word token.
*
* @param token the word token to look up
* @return <code>true</code> iff a word in the dictionary contains the token
*/
public boolean containsToken(String token) {
token = SnowballStemmer.stem(token.trim().toLowerCase());
return tokens.contains(token);
}