本文整理匯總了Java中info.ephyra.nlp.SnowballStemmer.stemAllTokens方法的典型用法代碼示例。如果您正苦於以下問題:Java SnowballStemmer.stemAllTokens方法的具體用法?Java SnowballStemmer.stemAllTokens怎麽用?Java SnowballStemmer.stemAllTokens使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類info.ephyra.nlp.SnowballStemmer
的用法示例。
在下文中一共展示了SnowballStemmer.stemAllTokens方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: add
import info.ephyra.nlp.SnowballStemmer; //導入方法依賴的package包/類
/**
* Adds a word to the dictionary.
*
* @param word the word to add
*/
public void add(String word) {
if (word != null) {
word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
word = SnowballStemmer.stemAllTokens(word);
// add whole word
if (word.length() > 0) words.add(word);
// add tokens of word
String[] tokens = word.split(" ");
if (tokens.length > maxTokens) maxTokens = tokens.length;
for (int p = 0; p < tokens.length; p++)
if (tokens[p].length() > 0) this.tokens.add(tokens[p]);
}
}
示例2: HashDictionary
import info.ephyra.nlp.SnowballStemmer; //導入方法依賴的package包/類
/**
* Creates a <code>HashDictionary</code> from a list of words in a file.
*
* @param fileName file containing a list of words
* @throws IOException if the list could not be read from the file
*/
public HashDictionary(String fileName) throws IOException {
this();
if (fileName != null) {
File file = new File(fileName);
BufferedReader in = new BufferedReader(new FileReader(file));
while (in.ready()) {
// read and normalize word
String word = in.readLine().trim();
if (word.startsWith("//")) continue; // skip comments
word = NETagger.tokenizeWithSpaces(word.toLowerCase());
word = SnowballStemmer.stemAllTokens(word);
// add whole word
if (word.length() > 0) words.add(word);
// add tokens of word
String[] tokens = word.split(" ");
if (tokens.length > maxTokens) maxTokens = tokens.length;
for (int p = 0; p < tokens.length; p++)
if (tokens[p].length() > 0) this.tokens.add(tokens[p]);
}
in.close();
}
}
示例3: contains
import info.ephyra.nlp.SnowballStemmer; //導入方法依賴的package包/類
/**
* Looks up a word.
*
* @param word the word to look up
* @return <code>true</code> iff the word was found
*/
public boolean contains(String word) {
word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
word = SnowballStemmer.stemAllTokens(word);
return words.contains(word);
}
示例4: fuzzyContains
import info.ephyra.nlp.SnowballStemmer; //導入方法依賴的package包/類
/**
* Does a fuzzy lookup for a word. The specified word w is considered as
* contained in the dictionary is there is a word W in the dictionary such
* that <code>LevenshteinDistance(w, W) <= maxDistance</code>
*
* @param word the word to look up
* @param maxDistance the maximum Levenshtein edit distance for fuzzy
* comparison
* @return <code>true</code> iff the word was found
*/
public boolean fuzzyContains(String word, int maxDistance) {
word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
word = SnowballStemmer.stemAllTokens(word);
if (maxDistance == 0) return this.words.contains(word);
else if (this.words.contains(word)) return true;
Iterator<String> wordIter = this.words.iterator();
while (wordIter.hasNext())
if (getLevenshteinDistance(word, wordIter.next(), maxDistance, true, 1, 1) <= maxDistance) return true;
return false;
}