本文整理汇总了Java中org.apache.uima.conceptMapper.support.stemmer.Stemmer类的典型用法代码示例。如果您正苦于以下问题:Java Stemmer类的具体用法?Java Stemmer怎么用?Java Stemmer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Stemmer类属于org.apache.uima.conceptMapper.support.stemmer包,在下文中一共展示了Stemmer类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildConceptMapperAggregate
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
private static AnalysisEngineDescription buildConceptMapperAggregate(List<String> paramValues,
TypeSystemDescription tsd, File dictionaryFile, Class<? extends Annotation> spanFeatureStructureClass)
throws UIMAException, IOException {
CaseMatchParamValue caseMatchParamValue = getCaseMatchParamValue(paramValues);
SearchStrategyParamValue searchStrategyParamValue = getSearchStrategyParamValue(paramValues);
Class<? extends Stemmer> stemmerClass = getStemmerClass(paramValues);
String[] stopwordList = getStopWordList(paramValues);
boolean orderIndependentLookup = getOrderIndependentLookup(paramValues);
boolean findAllMatches = getFindAllMatches(paramValues);
// boolean replaceCommaWithAnd = getReplaceCommaWithAnd(paramValues);
boolean replaceCommaWithAnd = false; // this parameter doesn't appear to be enabled in
// ConceptMapper
return ConceptMapperAggregateFactory.getOffsetTokenizerConceptMapperAggregateDescription(tsd, dictionaryFile,
caseMatchParamValue, searchStrategyParamValue, spanFeatureStructureClass, stemmerClass, stopwordList,
orderIndependentLookup, findAllMatches, replaceCommaWithAnd);
}
示例2: getOffsetTokenizerConceptMapperAggregateDescription
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* Returns an aggregate description for a UIMA pipeline containing the OffsetTokenizer followed
* by the ConceptMapper
*
* @param tsd
* @param dictionaryFile
* @param caseMatchParamValue
* @param searchStrategyParamValue
* @param spanFeatureStructureClass
* commonly edu.ucdenver.ccp.nlp.ext.uima.types.Sentence
* @param stemmerClass
* optional, leave null if not desired
* @param stopwordList
* @param orderIndependentLookup
* @param findAllMatches
* @param replaceCommaWithAnd
* @return
* @throws UIMAException
* @throws IOException
*/
public static AnalysisEngineDescription getOffsetTokenizerConceptMapperAggregateDescription(
TypeSystemDescription tsd, File dictionaryFile, CaseMatchParamValue caseMatchParamValue,
SearchStrategyParamValue searchStrategyParamValue, Class<? extends Annotation> spanFeatureStructureClass,
Class<? extends Stemmer> stemmerClass, String[] stopwordList, boolean orderIndependentLookup,
boolean findAllMatches, boolean replaceCommaWithAnd) throws UIMAException, IOException {
/* Init the tokenizer */
Object[] tokenizerConfigData = null;
if (stemmerClass == null) {
tokenizerConfigData = OffsetTokenizerFactory.buildConfigurationData(caseMatchParamValue);
} else {
tokenizerConfigData = OffsetTokenizerFactory.buildConfigurationData(caseMatchParamValue, stemmerClass);
}
AnalysisEngineDescription offsetTokenizerDescription = OffsetTokenizerFactory.buildOffsetTokenizerDescription(
tsd, tokenizerConfigData);
OperationalProperties operationalProperties = offsetTokenizerDescription.getAnalysisEngineMetaData()
.getOperationalProperties();
// offsetTokenizerDescription.setImplementationName("offset tokenizer");
// System.out.println(offsetTokenizerDescription.getAnalysisEngineMetaData().getOperationalProperties().isMultipleDeploymentAllowed());
/* Init the concept mapper */
AnalysisEngineDescription conceptMapperDescription = ConceptMapperFactory.buildConceptMapperDescription(tsd,
dictionaryFile, caseMatchParamValue, searchStrategyParamValue, stemmerClass, stopwordList,
orderIndependentLookup, findAllMatches, replaceCommaWithAnd, spanFeatureStructureClass,
offsetTokenizerDescription);
if (offsetTokenizerDescription.getAnalysisEngineMetaData().getOperationalProperties() == null) {
offsetTokenizerDescription.getAnalysisEngineMetaData().setOperationalProperties(operationalProperties);
}
return AnalysisEngineFactory.createAggregateDescription(offsetTokenizerDescription, conceptMapperDescription);
}
示例3: buildConfigurationData
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
public static Object[] buildConfigurationData(CaseMatchParamValue caseMatchParamValue, Class<? extends Stemmer> stemmerClass,
String tokenDelimiters) {
/* @formatter:off */
return new Object[] {
OffsetTokenizerConfigParam.CASE_MATCH.paramName(), caseMatchParamValue.paramValue(),
OffsetTokenizerConfigParam.STEMMER_CLASS_NAME.paramName(), stemmerClass.getName(),
OffsetTokenizerConfigParam.TOKEN_DELIMITERS.paramName(), tokenDelimiters};
/* @formatter:on */
}
示例4: getStemmerClass
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
public static Class<? extends Stemmer> getStemmerClass(StemmerType stemmerType) {
switch (stemmerType) {
case PORTER:
return ConceptMapperPorterStemmer.class;
case BIOLEMMATIZER:
return ConceptMapperBioLemmatizer.class;
case NONE:
return ConceptMapperNullStemmer.class;
default:
throw new IllegalArgumentException("Unhandled stemmer type: " + stemmerType.name());
}
}
示例5: processConfigurationParameter
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* @param configParameterName
* @param configParameterValue
*/
public void processConfigurationParameter(String configParameterName, Object configParameterValue) {
if (configParameterName.equals(PARAM_CASE_MATCH)) {
String caseSense = (String) configParameterValue;
if (caseSense != null) {
if (caseSense.equalsIgnoreCase("insensitive")) {
this.setCaseFoldInitCap(true);
} else if (caseSense.equalsIgnoreCase("digitfold")) {
this.setCaseFoldDigit(true);
} else if (caseSense.equalsIgnoreCase("ignoreall")) {
// System.out.println("SETCASEFOLDALL");
this.setCaseFoldAll(true);
}
}
} else if ((configParameterName.equals(PARAM_STEMMER_CLASS)) && (configParameterValue != null)) {
try {
// System.err.println ("Trying to instantiate stemmer class: '" +
// (String) configParameters [i] + "'");
Class<?> stemmerClass = Class.forName((String) configParameterValue);
setStemmer((Stemmer) stemmerClass.newInstance());
setStemming(true);
} catch (Exception e) {
System.err.println("Exception trying to instantiate stemmer class: '"
+ (String) configParameterValue + "', original exception:" + e.getMessage());
e.printStackTrace();
}
} else if (configParameterName.equals(PARAM_TOKEN_DELIM)) {
String tokenDelimiters = (String) configParameterValue;
if (tokenDelimiters != null) {
this.setDelim(tokenDelimiters);
}
}
}
示例6: paramValue
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
@Override
public Class<? extends Stemmer> paramValue() {
return ConceptMapperStemmerFactory.getStemmerClass(stemmerType);
}
示例7: getStemmerClass
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* @param paramValues
* @return
*/
private static Class<? extends Stemmer> getStemmerClass(List<String> paramValues) {
String value = StringUtil.removePrefix(paramValues.get(2), STEMMER_KEY);
return ConceptMapperStemmerParam.valueOf(value).paramValue();
}
示例8: testBuildCmOffsetTokAggregate
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
@Test
public void testBuildCmOffsetTokAggregate() throws UIMAException, IOException {
CaseMatchParamValue caseMatchParamValue = CaseMatchParamValue.CASE_INSENSITIVE;
/* Init the tokenizer */
Object[] tokenizerConfigData = OffsetTokenizerFactory.buildConfigurationData(caseMatchParamValue);
AnalysisEngineDescription offsetTokenizerDescription = OffsetTokenizerFactory.buildOffsetTokenizerDescription(
tsd, tokenizerConfigData);
/* Init the concept mapper */
SearchStrategyParamValue searchStrategyParamValue = SearchStrategyParamValue.CONTIGUOUS_MATCH;
Class<? extends Annotation> spanFeatureStructureClass = Sentence.class;
Class<? extends Stemmer> stemmerClass = null;
String[] stopwords = new String[0];
boolean orderIndependentLookup = false;
boolean replaceCommaWithAnd = false;
boolean findAllMatches = false;
AnalysisEngineDescription conceptMapperDescription = ConceptMapperFactory.buildConceptMapperDescription(tsd,
dictionaryFile, caseMatchParamValue, searchStrategyParamValue, stemmerClass, stopwords,
orderIndependentLookup, findAllMatches, replaceCommaWithAnd, spanFeatureStructureClass,
offsetTokenizerDescription);
System.out.println("offset == null: " + (offsetTokenizerDescription == null));
System.out.println("cm == null: " + (conceptMapperDescription == null));
System.out.println("offset: " + offsetTokenizerDescription.toString());
System.out.println("offset op: "
+ offsetTokenizerDescription.getAnalysisEngineMetaData().getOperationalProperties()
.isMultipleDeploymentAllowed());
System.out.println("cm op: "
+ conceptMapperDescription.getAnalysisEngineMetaData().getOperationalProperties()
.isMultipleDeploymentAllowed());
/* Init the aggregate engine */
AnalysisEngineDescription cmAggregateDescription = AnalysisEngineFactory.createAggregateDescription(
offsetTokenizerDescription, conceptMapperDescription);
AnalysisEngine cmAggregateEngine = AnalysisEngineFactory.createAggregate(cmAggregateDescription);
cmAggregateEngine.process(jcas);
List<OntologyTerm> termList = CollectionsUtil.createList(JCasUtil.iterator(jcas, OntologyTerm.class));
assertEquals("Two ontology terms should have been found", 2, termList.size());
assertEquals("NEF1 complex", termList.get(0).getCoveredText());
assertEquals("GO:0000110", termList.get(0).getID());
assertEquals("nucleotide-excision repair complex", termList.get(1).getCoveredText());
assertEquals("GO:0000109", termList.get(1).getID());
}
示例9: testBuildCmOffsetTokAggregate_3
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
@Test
public void testBuildCmOffsetTokAggregate_3() throws UIMAException, IOException {
/* Init the tokenizer using the XML descriptor file */
CaseMatchParamValue caseMatchParamValue = CaseMatchParamValue.CASE_INSENSITIVE;
Object[] tokenizerConfigData = OffsetTokenizerFactory.buildConfigurationData(caseMatchParamValue);
AnalysisEngineDescription offsetTokenizerDescription = (AnalysisEngineDescription) ResourceCreationSpecifierFactory
.createResourceCreationSpecifier(tokenizerXmlFile.getAbsolutePath(), tokenizerConfigData);
offsetTokenizerDescription.getAnalysisEngineMetaData().setTypeSystem(tsd);
/*
* Init the concept mapper using the ConceptMapperFactory's method that does NOT use an XML
* file. (using one for the tokenizer is more difficult to avoid)
*/
SearchStrategyParamValue searchStrategyParamValue = SearchStrategyParamValue.CONTIGUOUS_MATCH;
Class<? extends Annotation> spanFeatureStructureClass = Sentence.class;
Class<? extends Stemmer> stemmerClass = null;
String[] stopwords = new String[0];
boolean orderIndependentLookup = false;
boolean replaceCommaWithAnd = false;
boolean findAllMatches = false;
AnalysisEngineDescription conceptMapperDescription = ConceptMapperFactory.buildConceptMapperDescription(tsd,
dictionaryFile, caseMatchParamValue, searchStrategyParamValue, stemmerClass, stopwords,
orderIndependentLookup, findAllMatches, replaceCommaWithAnd, spanFeatureStructureClass,
tokenizerXmlFile);
AnalysisEngine tokenizerEngine = UIMAFramework.produceAnalysisEngine(offsetTokenizerDescription);
AnalysisEngine conceptMapperEngine = UIMAFramework.produceAnalysisEngine(conceptMapperDescription);
tokenizerEngine.process(jcas);
conceptMapperEngine.process(jcas);
List<OntologyTerm> termList = CollectionsUtil.createList(JCasUtil.iterator(jcas, OntologyTerm.class));
assertEquals("Two ontology terms should have been found", 2, termList.size());
assertEquals("NEF1 complex", termList.get(0).getCoveredText());
assertEquals("GO:0000110", termList.get(0).getID());
assertEquals("nucleotide-excision repair complex", termList.get(1).getCoveredText());
assertEquals("GO:0000109", termList.get(1).getID());
}
示例10: TokenNormalizer
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* @param annotatorContext
* @param logger
* @throws AnnotatorContextException
*/
public TokenNormalizer(UimaContext uimaContext, Logger logger)
throws AnnotatorContextException {
super();
Boolean replaceCommaWithANDObj = (Boolean) uimaContext
.getConfigParameterValue(PARAM_REPLACE_COMMA_WITH_AND);
boolean replaceCommaWithAND = false;
if (replaceCommaWithANDObj != null) {
replaceCommaWithAND = replaceCommaWithANDObj.booleanValue();
}
String caseMatchParam = (String) uimaContext.getConfigParameterValue(PARAM_CASE_MATCH);
String stemmerParam = (String) uimaContext.getConfigParameterValue(PARAM_STEMMER_CLASS);
String stemmerDict = (String) uimaContext.getConfigParameterValue(PARAM_STEMMER_DICT);
this.replaceCommaWithAND = replaceCommaWithAND;
this.setCaseFoldInitCap(false);
this.setCaseFoldDigit(false);
this.setCaseFoldAll(false);
if (caseMatchParam != null) {
if (caseMatchParam.equalsIgnoreCase(CASE_INSENSITIVE)) {
this.setCaseFoldInitCap(true);
} else if (caseMatchParam.equalsIgnoreCase(CASE_FOLD_DIGITS)) {
this.setCaseFoldDigit(true);
} else if (caseMatchParam.equalsIgnoreCase(CASE_IGNORE)) {
this.setCaseFoldAll(true);
}
}
if (stemmerParam != null) {
try {
Class<?> stemmerClass = Class.forName(stemmerParam);
setStemmer((Stemmer) stemmerClass.newInstance());
getStemmer().initialize(stemmerDict);
} catch (Exception e) {
logger.logError("Exception trying to instantiate stemmer class: '" + stemmerParam
+ "', original exception:" + e.getMessage());
e.printStackTrace();
}
}
}
示例11: getStemmer
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* @return Returns the stemmer.
*/
public Stemmer getStemmer() {
return stemmer;
}
示例12: doStemming
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
public static String doStemming(String token, Stemmer stemmer) {
return stemmer.stem(token.trim());
}
示例13: setStemmer
import org.apache.uima.conceptMapper.support.stemmer.Stemmer; //导入依赖的package包/类
/**
* @param stemmer
* The stemmer to set.
*/
public void setStemmer(Stemmer stemmer) {
this.stemmer = stemmer;
}