本文整理汇总了Java中weka.core.tokenizers.Tokenizer类的典型用法代码示例。如果您正苦于以下问题:Java Tokenizer类的具体用法?Java Tokenizer怎么用?Java Tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Tokenizer类属于weka.core.tokenizers包,在下文中一共展示了Tokenizer类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
@OptionMetadata(displayName = "tokenizer",
description = "The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default",
commandLineParamName = "tokenizer",
commandLineParamSynopsis = "-tokenizer <string>", displayOrder = 3)
public Tokenizer getTokenizer() {
return m_tokenizer;
}
示例2: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
@OptionMetadata(displayName = "tokenizer",
description = "The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default",
commandLineParamName = "tokenizer",
commandLineParamSynopsis = "-tokenizer <string>", displayOrder = 1)
public Tokenizer getTokenizer() {
return m_tokenizer;
}
示例3: tokenize
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Tokenizes a String
* @param content the content
* @param toLowerCase true for lowercasing the content
* @param standarizeUrlsUsers true for standarizing urls and users
* @param reduceRepeatedLetters true for reduing repeated letters
* @param tokenizer the tokenizer
* @param stemmer the stemmer
* @param stop the stopwords handler
* @return a list of tokens
*/
static public List<String> tokenize(String content, boolean toLowerCase, boolean standarizeUrlsUsers, boolean reduceRepeatedLetters, Tokenizer tokenizer, Stemmer stemmer, StopwordsHandler stop) {
if (toLowerCase)
content = content.toLowerCase();
// if a letters appears two or more times it is replaced by only two
// occurrences of it
if (reduceRepeatedLetters)
content = content.replaceAll("([a-z])\\1+", "$1$1");
List<String> tokens = new ArrayList<String>();
tokenizer.tokenize(content);
for(;tokenizer.hasMoreElements();){
String token=tokenizer.nextElement();
if(!stop.isStopword(token)){
if (standarizeUrlsUsers) {
// Replace URLs to a generic URL
if (token.matches("http.*|ww\\..*|www\\..*")) {
token="http://www.url.com";
}
// Replaces user mentions to a generic user
else if (token.matches("@.*")) {
token="@user";
}
}
tokens.add(stemmer.stem(token));
}
}
return tokens;
}
示例4: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
public void setTokenizer(Tokenizer m_tokenizer) {
this.m_tokenizer = m_tokenizer;
}
示例5: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
示例6: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
示例7: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
示例8: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
示例9: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_Tokenizer = value;
}
示例10: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_Tokenizer;
}
示例11: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
示例12: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
示例13: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_Tokenizer = value;
}
示例14: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_Tokenizer;
}