本文整理汇总了Java中edu.stanford.nlp.international.morph.MorphoFeatureSpecification类的典型用法代码示例。如果您正苦于以下问题:Java MorphoFeatureSpecification类的具体用法?Java MorphoFeatureSpecification怎么用?Java MorphoFeatureSpecification使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
MorphoFeatureSpecification类属于edu.stanford.nlp.international.morph包,在下文中一共展示了MorphoFeatureSpecification类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: transformTree
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
@Override
public Tree transformTree(Tree t, Tree root) {
String baseCat = t.value();
StringBuilder newCategory = new StringBuilder();
//Add manual state splits
for (Pair<TregexPattern,Function<TregexMatcher,String>> e : activeAnnotations) {
TregexMatcher m = e.first().matcher(root);
if (m.matchesAt(t))
newCategory.append(e.second().apply(m));
}
//Add morphosyntactic features if this is a POS tag
if(t.isPreTerminal() && tagSpec != null) {
if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null )
throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString()));
String morphoStr = ((CoreLabel) t.firstChild().label()).originalText();
Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString("", morphoStr);
MorphoFeatures feats = tagSpec.strToFeatures(lemmaMorph.second());
baseCat = feats.getTag(baseCat);
}
//Update the label(s)
String newCat = baseCat + newCategory.toString();
t.setValue(newCat);
if (t.isPreTerminal() && t.label() instanceof HasTag)
((HasTag) t.label()).setTag(newCat);
return t;
}
示例2: tokenToDatums
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
/**
* Convert token to a sequence of datums and add to iobList.
*
* @param iobList
* @param tokenText
* @param tokenLabel
* @param lastToken
* @param charIndex
* @param applyRewriteRules
*/
private static void tokenToDatums(List<CoreLabel> iobList, String token, TokenType tokType,
CoreLabel tokenLabel, String lastToken, int charIndex, boolean applyRewriteRules) {
String lastLabel = ContinuationSymbol;
String firstLabel = BeginSymbol;
if (applyRewriteRules) {
// Apply Arabic-specific re-write rules
String rawToken = tokenLabel.word();
String tag = tokenLabel.tag();
MorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
featureSpec.activate(MorphoFeatureType.NGEN);
featureSpec.activate(MorphoFeatureType.NNUM);
MorphoFeatures features = featureSpec.strToFeatures(tag);
// Rule #1 : ت --> ة
if (features.getValue(MorphoFeatureType.NGEN).equals("F")
&& features.getValue(MorphoFeatureType.NNUM).equals("SG") && rawToken.endsWith("ت-")) {
lastLabel = RewriteTahSymbol;
}
// Rule #2 : لل --> ل ال
if (lastToken.equals("ل") && rawToken.startsWith("-ل")) {
firstLabel = RewriteTareefSymbol;
}
}
int index = tokenLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
String origToken = tokenLabel.get(CoreAnnotations.OriginalTextAnnotation.class);
// Create datums and add to iobList
String firstChar = String.valueOf(token.charAt(0));
iobList.add(createDatum(firstChar, firstLabel, charIndex++, firstChar,
String.valueOf(origToken.charAt(0)), index++,
index, tokenLabel.get(CoreAnnotations.BeforeAnnotation.class)));
final int numChars = token.length();
for (int j = 1; j < numChars; ++j) {
String thisChar = String.valueOf(token.charAt(j));
String charLabel = (j == numChars - 1) ? lastLabel : ContinuationSymbol;
iobList.add(createDatum(thisChar, charLabel, charIndex++, thisChar,
String.valueOf(origToken.charAt(j)),index++, index, ""));
}
}
示例3: morphFeatureSpec
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
/**
* Returns a morphological feature specification for words in this language.
*/
@Override
public MorphoFeatureSpecification morphFeatureSpec() {
return null;
}
示例4: FactoredLexicon
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
public FactoredLexicon(MorphoFeatureSpecification morphoSpec, Index<String> wordIndex, Index<String> tagIndex) {
super(wordIndex, tagIndex);
this.morphoSpec = morphoSpec;
}
示例5: train
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
/**
* This method should populate wordIndex, tagIndex, and morphIndex.
*/
@Override
public void train(Collection<Tree> trees, Collection<Tree> rawTrees) {
double weight = 1.0;
// Train uw model on words
uwModelTrainer.train(trees, weight);
final double numTrees = trees.size();
Iterator<Tree> rawTreesItr = rawTrees == null ? null : rawTrees.iterator();
Iterator<Tree> treeItr = trees.iterator();
// Train factored lexicon on lemmas and morph tags
int treeId = 0;
while (treeItr.hasNext()) {
Tree tree = treeItr.next();
// CoreLabels, with morph analysis in the originalText annotation
List<Label> yield = rawTrees == null ? tree.yield() : rawTreesItr.next().yield();
// Annotated, binarized tree for the tags (labels are usually CategoryWordTag)
List<Label> pretermYield = tree.preTerminalYield();
int yieldLen = yield.size();
for (int i = 0; i < yieldLen; ++i) {
String word = yield.get(i).value();
int wordId = wordIndex.indexOf(word, true); // Don't do anything with words
String tag = pretermYield.get(i).value();
int tagId = tagIndex.indexOf(tag, true);
// Use the word as backup if there is no lemma
String featureStr = ((CoreLabel) yield.get(i)).originalText();
Pair<String,String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureStr);
String lemma = lemmaMorph.first();
int lemmaId = wordIndex.indexOf(lemma, true);
String richMorphTag = lemmaMorph.second();
String reducedMorphTag = morphoSpec.strToFeatures(richMorphTag).toString().trim();
reducedMorphTag = reducedMorphTag.length() == 0 ? NO_MORPH_ANALYSIS : reducedMorphTag;
int morphId = morphIndex.indexOf(reducedMorphTag, true);
// Seen event counts
wordTag.incrementCount(wordId, tagId);
lemmaTag.incrementCount(lemmaId, tagId);
morphTag.incrementCount(morphId, tagId);
tagCounter.incrementCount(tagId);
// Unseen event counts
if (treeId > op.trainOptions.fractionBeforeUnseenCounting*numTrees) {
if (! wordTag.firstKeySet().contains(wordId) || wordTag.getCounter(wordId).totalCount() < 2) {
wordTagUnseen.incrementCount(tagId);
}
if (! lemmaTag.firstKeySet().contains(lemmaId) || lemmaTag.getCounter(lemmaId).totalCount() < 2) {
lemmaTagUnseen.incrementCount(tagId);
}
if (! morphTag.firstKeySet().contains(morphId) || morphTag.getCounter(morphId).totalCount() < 2) {
morphTagUnseen.incrementCount(tagId);
}
}
}
++treeId;
if (DEBUG && (treeId % 100) == 0) {
System.err.printf("[%d]",treeId);
}
if (DEBUG && (treeId % 10000) == 0) {
System.err.println();
}
}
}
示例6: morphFeatureSpec
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
@Override
public MorphoFeatureSpecification morphFeatureSpec() {
return new ArabicMorphoFeatureSpecification();
}
示例7: normalizeWholeTree
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
@Override
public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
for(Tree t : tree) {
//Map punctuation tags back like the PTB
if(t.isPreTerminal()) {
String posStr = normalizePreterminal(t);
t.setValue(posStr);
if(t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr);
} else if(t.isLeaf()) {
//Strip off morphological analyses and place them in the OriginalTextAnnotation, which is
//specified by HasContext.
if(t.value().contains(MorphoFeatureSpecification.MORPHO_MARK)) {
String[] toks = t.value().split(MorphoFeatureSpecification.MORPHO_MARK);
if(toks.length != 2)
System.err.printf("%s: Word contains malformed morph annotation: %s%n",this.getClass().getName(),t.value());
else if(t.label() instanceof CoreLabel) {
((CoreLabel) t.label()).setValue(toks[0].trim().intern());
((CoreLabel) t.label()).setWord(toks[0].trim().intern());
((CoreLabel) t.label()).setOriginalText(toks[1].trim().intern());
} else {
System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n",this.getClass().getName(),t.label().getClass().getName());
}
}
}
}
//Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
//If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
//will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1)
tree = tree.firstChild();
//Ensure that the tree has a top-level unary rewrite
if(tree != null && !tree.value().equals(rootLabel))
tree = tf.newTreeNode(rootLabel, Collections.singletonList(tree));
return tree;
}
示例8: morphFeatureSpec
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
@Override
public MorphoFeatureSpecification morphFeatureSpec() {
return new FrenchMorphoFeatureSpecification();
}
示例9: morphFeatureSpec
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
/**
* Returns a morphological feature specification for words in this language.
*/
public MorphoFeatureSpecification morphFeatureSpec() {
return null;
}
示例10: morphFeatureSpec
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification; //导入依赖的package包/类
/**
* The morphological feature specification for the language.
*
* @return A language-specific MorphoFeatureSpecification
*/
public abstract MorphoFeatureSpecification morphFeatureSpec();