本文整理汇总了Java中org.cleartk.ml.Feature.createName方法的典型用法代码示例。如果您正苦于以下问题:Java Feature.createName方法的具体用法?Java Feature.createName怎么用?Java Feature.createName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.cleartk.ml.Feature
的用法示例。
在下文中一共展示了Feature.createName方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: LTCharacterNgramFeatureFunction
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* This feature function serves up character n-grams based on StringValued features. For example,
* if you wanted trigram suffixes (e.g. 'ion' of 'emotion') for words that are of length 7 or more
* you could call the constructor with the following:
* CharacterNGramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 3, 7, false)
*
* @param featureName
* a user-specified name for the feature function, to be included in all feature names.
* @param orientation
* must be one of LEFT_TO_RIGHT or RIGHT_TO_LEFT. The orientation determines whether
* index 0 corresponds to the first character of the string value or the last. The
* orientation does not affect the ordering of the characters in the n-gram which are
* always returned in left-to-right order.
* @param start
* the start of the n-gram (typically 0 for both orientations)
* @param end
* the end of the n-gram (typically n for both orientations)
* @param minimumValueLength
* This parameter allows you to skip string values that are too short. It must be greater
* than or equal to end.
* @param lowerCase
* if true than the n-gram used as the feature value will be lowercased.
*/
public LTCharacterNgramFeatureFunction(
String featureName,
Orientation orientation,
int start,
int end,
int minimumValueLength,
boolean lowerCase) {
name = Feature.createName(
"NGram",
orientation == Orientation.RIGHT_TO_LEFT ? "Right" : "Left",
String.valueOf(start),
String.valueOf(end),
String.valueOf(minimumValueLength),
lowerCase ? "lower" : null,
featureName);
if (minimumValueLength < end) {
throw new IllegalArgumentException(
"minimumValueLength must be greater than or equal to the parameter end.");
}
this.orientation = orientation;
this.start = start;
this.end = end;
this.minimumValueLength = minimumValueLength;
this.lowerCase = lowerCase;
}
示例2: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* @return will return an empty list if the value of the feature is not a StringValue or is not as
* long as the minimumValueLength.
*/
@Override
public List<Feature> apply(Feature feature) {
String featureName = Feature.createName(name, feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null || !(featureValue instanceof String)) {
return Collections.singletonList(new Feature(featureName, "OUT"));
}
String value = featureValue.toString();
if (value == null || value.length() < minimumValueLength) {
return Collections.singletonList(new Feature(featureName, "OUT"));
}
String ngram;
if (orientation == Orientation.LEFT_TO_RIGHT) {
ngram = value.substring(start, end);
} else {
ngram = value.substring(value.length() - end, value.length() - start);
}
if (lowerCase) {
ngram = ngram.toLowerCase();
}
return Collections.singletonList(new Feature(featureName, ngram));
}
示例3: encode
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
public List<NameNumber> encode(Feature feature) throws CleartkEncoderException {
FeatureCollection fc = (FeatureCollection) feature.getValue();
List<NameNumber> fves = new ArrayList<NameNumber>();
if (identifier != null && !identifier.equals(fc.getIdentifier()))
return Collections.emptyList();
for (Feature f : fc.getFeatures()) {
Feature f1 = new Feature(Feature.createName(feature.getName(), f.getName()), f.getValue());
fves.addAll(subEncoder.encode(f1));
}
normalizer.normalize(fves);
return fves;
}
示例4: extract
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
public List<Feature> extract(JCas jCas, Annotation annotation1, Annotation annotation2) {
Annotation firstAnnotation, secondAnnotation;
if (annotation1.getBegin() <= annotation2.getBegin()) {
firstAnnotation = annotation1;
secondAnnotation = annotation2;
} else {
firstAnnotation = annotation2;
secondAnnotation = annotation1;
}
String featureName = Feature.createName(this.name, "Distance", this.unitClass.getSimpleName());
int featureValue;
if (secondAnnotation.getBegin() <= firstAnnotation.getEnd()) {
featureValue = 0;
} else {
List<? extends Annotation> annotations = JCasUtil.selectCovered(
jCas,
unitClass,
firstAnnotation.getEnd(),
secondAnnotation.getBegin());
featureValue = annotations.size();
}
return Collections.singletonList(new Feature(featureName, featureValue));
}
示例5: CharacterNgramFeatureFunction
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* This feature function serves up character n-grams based on StringValued features. For example,
* if you wanted trigram suffixes (e.g. 'ion' of 'emotion') for words that are of length 7 or more
* you could call the constructor with the following:
* CharacterNGramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 3, 7, false)
*
* @param featureName
* a user-specified name for the feature function, to be included in all feature names.
* @param orientation
* must be one of LEFT_TO_RIGHT or RIGHT_TO_LEFT. The orientation determines whether
* index 0 corresponds to the first character of the string value or the last. The
* orientation does not affect the ordering of the characters in the n-gram which are
* always returned in left-to-right order.
* @param start
* the start of the n-gram (typically 0 for both orientations)
* @param end
* the end of the n-gram (typically n for both orientations)
* @param minimumValueLength
* This parameter allows you to skip string values that are too short. It must be greater
* than or equal to end.
* @param lowerCase
* if true than the n-gram used as the feature value will be lowercased.
*/
public CharacterNgramFeatureFunction(
String featureName,
Orientation orientation,
int start,
int end,
int minimumValueLength,
boolean lowerCase) {
name = Feature.createName(
"NGram",
orientation == Orientation.RIGHT_TO_LEFT ? "Right" : "Left",
String.valueOf(start),
String.valueOf(end),
String.valueOf(minimumValueLength),
lowerCase ? "lower" : null,
featureName);
if (minimumValueLength < end) {
throw new IllegalArgumentException(
"minimumValueLength must be greater than or equal to the parameter end.");
}
this.orientation = orientation;
this.start = start;
this.end = end;
this.minimumValueLength = minimumValueLength;
this.lowerCase = lowerCase;
}
示例6: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
@Override
public List<Feature> apply(Feature feature) {
String featureName = Feature.createName(DEFAULT_NAME, feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null)
return Collections.emptyList();
else if (featureValue instanceof String) {
String value = featureValue.toString();
if (value == null || value.length() == 0)
return Collections.emptyList();
if (HyphenUtil.containsHyphen(value))
return Collections.singletonList(new Feature(featureName, CONTAINS_HYPHEN));
}
return Collections.emptyList();
}
示例7: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
@Override
public List<Feature> apply(Feature feature) {
String featureName = Feature.createName(getFeatureName(), feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null)
return Collections.emptyList();
else if (featureValue instanceof String) {
String text = featureValue.toString();
StringBuilder builder = new StringBuilder();
String lastType = null;
boolean multipleRepeats = false;
for (int i = 0; i < text.length(); i += 1) {
char c = text.charAt(i);
String type = classifyChar(c);
switch (this.patternType) {
case ONE_PER_CHAR:
builder.append(type);
break;
case REPEATS_MERGED:
if (!type.equals(lastType)) {
builder.append(type);
}
break;
case REPEATS_AS_KLEENE_PLUS:
if (!type.equals(lastType)) {
builder.append(type);
multipleRepeats = false;
} else if (!multipleRepeats) {
builder.append('+');
multipleRepeats = true;
}
}
lastType = type;
}
return Collections.singletonList(new Feature(featureName, builder.toString()));
}
return Collections.emptyList();
}
示例8: encode
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
public List<NameNumber> encode(Feature feature) {
List<NameNumber> fves = new ArrayList<NameNumber>();
Counts frequencies = (Counts) feature.getValue();
String prefix = frequencies.getFeatureName();
for (Object key : frequencies.getValues()) {
String name = Feature.createName(prefix, key.toString());
NameNumber fve = new NameNumber(name, frequencies.getCount(key));
fves.add(fve);
}
normalizer.normalize(fves);
return fves;
}
示例9: extract
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
public List<Feature> extract(JCas jCas, Annotation annotation1, Annotation annotation2) {
String featureName = Feature.createName(this.name, "DDistance", this.unitClass.getSimpleName());
Annotation firstAnnotation, secondAnnotation;
boolean negate = false;
if (annotation1.getBegin() <= annotation2.getBegin()) {
firstAnnotation = annotation1;
secondAnnotation = annotation2;
} else {
firstAnnotation = annotation2;
secondAnnotation = annotation1;
negate = true;
}
int featureValue = 0;
if (AnnotationUtil.overlaps(annotation1, annotation2)) {
featureValue = 0;
} else {
List<? extends Annotation> annotations = JCasUtil.selectCovered(
jCas,
unitClass,
firstAnnotation.getEnd(),
secondAnnotation.getBegin());
featureValue = annotations.size() + 1;
}
if (negate)
featureValue = -featureValue;
return Collections.singletonList(new Feature(featureName, featureValue));
}
示例10: Context_ImplBase
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
public Context_ImplBase(int begin, int end) {
if (begin > end) {
String message = "expected begin < end, found begin=%d end=%d";
throw new IllegalArgumentException(String.format(message, begin, end));
}
this.begin = begin;
this.end = end;
this.name = Feature.createName(
this.getClass().getSimpleName(),
String.valueOf(this.begin),
String.valueOf(this.end));
}
示例11: Bag
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* Constructs a {@link Context} which converts the features extracted by the argument contexts
* into a bag of features where all features have the same name.
*
* @param contexts
* The contexts which should be combined into a bag.
*/
public Bag(Context... contexts) {
this.contexts = contexts;
String[] names = new String[contexts.length + 1];
names[0] = "Bag";
for (int i = 1; i < names.length; ++i) {
names[i] = contexts[i - 1].getName();
}
this.name = Feature.createName(names);
}
示例12: Count
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* Constructs a {@link Context} which converts the features extracted by the argument contexts
* into a bag of count features.
*
* @param contexts
* The contexts which should be combined into a bag.
*/
public Count(Context... contexts) {
this.contexts = contexts;
String[] names = new String[contexts.length + 1];
names[0] = "Count";
for (int i = 1; i < names.length; ++i) {
names[i] = contexts[i - 1].getName();
}
this.name = Feature.createName(names);
}
示例13: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* If the value of the feature is a StringValue and is determined to be one of ALL_UPPERCASE,
* ALL_LOWERCASE, INITIAL_UPPERCASE, or MIXED_CASE, then a new feature containing one of those
* four values is returned. If the value of the feature cannot be characterized by one of these
* four values, then the empty list is returned (e.g. the value is an empty string, contains only
* white space, or contains only digits, etc.)
*
* <P>
* This method was inspired by CapitalizationTypeTagger.py written by Steven Bethard.
*
* @return a feature that has a value that is one of ALL_UPPERCASE, ALL_LOWERCASE,
* INITIAL_UPPERCASE, or MIXED_CASE. Otherwise the empty list is returned.
*/
public List<Feature> apply(Feature feature) {
String featureName = Feature.createName(DEFAULT_NAME, feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null)
return Collections.emptyList();
else if (featureValue instanceof String) {
String value = featureValue.toString();
if (value == null || value.length() == 0)
return Collections.emptyList();
String lowerCaseValue = value.toLowerCase();
String upperCaseValue = value.toUpperCase();
if (lowerCaseValue.equals(upperCaseValue))
return Collections.emptyList();
if (value.equals(value.toLowerCase())) {
return Collections.singletonList(new Feature(
featureName,
CapitalType.ALL_LOWERCASE.toString()));
} else if (value.equals(value.toUpperCase())) {
return Collections.singletonList(new Feature(
featureName,
CapitalType.ALL_UPPERCASE.toString()));
}
if (CaseUtil.isInitialUppercase(value)) {
return Collections.singletonList(new Feature(
featureName,
CapitalType.INITIAL_UPPERCASE.toString()));
}
return Collections.singletonList(new Feature(featureName, CapitalType.MIXED_CASE.toString()));
} else
return Collections.emptyList();
}
示例14: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* If the value of the feature is a StringValue and is determined to be one of DIGITS,
* YEAR_DIGITS, ALPHANUMERIC, SOME_DIGITS, or ROMAN_NUMERAL, then a feature containing one of
* those five values is returned. If the value of the feature cannot be characterized by one of
* these five values, then an empty list is returned (e.g. the value is an empty string, contains
* only white space, or contains only letters, etc.)
*
* <p>
* This method draws heavily from NumericTypeTagger.py written by Steven Bethard. That code
* credits <a href="http://diveintopython.org/unit_testing/stage_5.html">Dive Into Python</a> for
* the regular expression for matching roman numerals.
*
* @return a feature that has a value that is one of DIGITS, YEAR_DIGITS, ALPHANUMERIC,
* SOME_DIGITS, or ROMAN_NUMERAL. Otherwise an empty list is returned.
*/
@Override
public List<Feature> apply(Feature feature) {
String featureName = Feature.createName(DEFAULT_NAME, feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null)
return Collections.emptyList();
else if (featureValue instanceof String) {
String value = featureValue.toString();
if (value == null || value.length() == 0)
return Collections.emptyList();
if (NumericTypeUtil.isDigits(value)) {
if (yearDigitsPattern.matcher(value).matches()) {
return Collections.singletonList(new Feature(
featureName,
NumericType.YEAR_DIGITS.toString()));
} else
return Collections.singletonList(new Feature(featureName, NumericType.DIGITS.toString()));
} else if (NumericTypeUtil.containsDigits(value)) {
if (alphanumericPattern.matcher(value).matches() && someLetters.matcher(value).find()) {
return Collections.singletonList(new Feature(
featureName,
NumericType.ALPHANUMERIC.toString()));
} else
return Collections.singletonList(new Feature(
featureName,
NumericType.SOME_DIGITS.toString()));
} else if (romanNumeralPattern.matcher(value).matches()) {
return Collections.singletonList(new Feature(
featureName,
NumericType.ROMAN_NUMERAL.toString()));
}
}
return Collections.emptyList();
}
示例15: apply
import org.cleartk.ml.Feature; //导入方法依赖的package包/类
/**
* If the value of the feature is a StringValue and is determined to be one of ALL_UPPERCASE,
* ALL_LOWERCASE, INITIAL_UPPERCASE, or MIXED_CASE, then a new feature containing one of those
* four values is returned. If the value of the feature cannot be characterized by one of these
* four values, then the empty list is returned (e.g. the value is an empty string, contains
* only white space, or contains only digits, etc.)
*
* <P>
* This method was inspired by CapitalizationTypeTagger.py written by Steven Bethard.
*
* @return a feature that has a value that is one of ALL_UPPERCASE, ALL_LOWERCASE,
* INITIAL_UPPERCASE, or MIXED_CASE. Otherwise the empty list is returned.
*/
@Override
public List<Feature> apply(Feature feature)
{
String featureName = Feature.createName(DEFAULT_NAME, feature.getName());
Object featureValue = feature.getValue();
if (featureValue == null) {
return Collections.singletonList(new Feature(featureName, CapitalType.INVALID
.toString()));
}
else if (featureValue instanceof String) {
String value = featureValue.toString();
if (value == null || value.length() == 0) {
return Collections.singletonList(new Feature(featureName, CapitalType.INVALID
.toString()));
}
String lowerCaseValue = value.toLowerCase();
String upperCaseValue = value.toUpperCase();
if (lowerCaseValue.equals(upperCaseValue)) {
return Collections.singletonList(new Feature(featureName, CapitalType.INVALID
.toString()));
}
if (value.equals(value.toLowerCase())) {
return Collections.singletonList(new Feature(featureName, CapitalType.ALL_LOWERCASE
.toString()));
}
else if (value.equals(value.toUpperCase())) {
return Collections.singletonList(new Feature(featureName, CapitalType.ALL_UPPERCASE
.toString()));
}
if (CaseUtil.isInitialUppercase(value)) {
return Collections.singletonList(new Feature(featureName,
CapitalType.INITIAL_UPPERCASE.toString()));
}
return Collections.singletonList(new Feature(featureName, CapitalType.MIXED_CASE
.toString()));
}
else {
return Collections.singletonList(new Feature(featureName, CapitalType.INVALID
.toString()));
}
}