本文整理汇总了Java中cc.mallet.types.TokenSequence.getProperty方法的典型用法代码示例。如果您正苦于以下问题:Java TokenSequence.getProperty方法的具体用法?Java TokenSequence.getProperty怎么用?Java TokenSequence.getProperty使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.TokenSequence
的用法示例。
在下文中一共展示了TokenSequence.getProperty方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", carrier);
List<String> sylls = struct.getOncGrams();
Preconditions.checkState(ts.size() == sylls.size(), "sylls and grams dont equal size");
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
if (t.getText().length() != sylls.get(i).length()) {
throw new IllegalStateException("grams doesnt match syll grams " + t + " - " + sylls);
}
for (int j = 0; j < windows.size(); j++) {
TokenWindow window = windows.get(j);
String windStr = TokenSeqUtil.getWindowFromStrings(sylls, i, window.offset, window.width);
if (windStr == null) {
continue;
}
String feature = "SYN_" + windStr + "@" + window.offset;
t.setFeatureValue(feature, 1.0);
}
}
return carrier;
}
示例2: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
if (struct.getSyllCount() >= 1) {
markAll(ts, struct, 0, "SYLLREL_1");
}
if (struct.getSyllCount() >= 2) {
markAll(ts, struct, struct.getLastSyllIndex(), "SYLLREL_-1");
}
if (struct.getSyllCount() >= 3) {
markAll(ts, struct, 1, "SYLLREL_2");
}
if (struct.getSyllCount() >= 4) {
markAll(ts, struct, struct.getLastSyllIndex() - 1, "SYLLREL_-2");
}
if (struct.getSyllCount() >= 5) {
for (int i = 2; i < (struct.getLastSyllIndex() - 1); i++) {
markAll(ts, struct, i, "SYLLRELL_X");
}
}
return inst;
}
示例3: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
int prevSyll = -1;
for (int i = 0; i < ts.size(); i++) {
Token tok = ts.get(i);
int currSyll = struct.getSyllIndexForGraphoneGramIndex(i);
if (prevSyll != currSyll) {
tok.setFeatureValue("SYLLMARK_" + currSyll, 1.0);
prevSyll = currSyll;
}
}
return inst;
}
示例4: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "cant use struct without sylls", inst);
for (int i = 0; i < ts.size(); i++) {
Token tt = ts.get(i);
if (!struct.graphoneGramIndexContainsNucleus(i)) {
continue;
}
int syllIndex = struct.getSyllIndexForGraphoneGramIndex(i);
String feat = null;
if (onlyLast) {
if (syllIndex < struct.getLastSyllIndex()) {
feat = prefix + struct.getSyllPart(struct.getLastSyllIndex(), maxOnset, -1, maxCoda);
}
} else if (relativeIndex != 0) {
int targetSyll = syllIndex + relativeIndex;
if (targetSyll >= 0 && targetSyll <= struct.getLastSyllIndex()) {
feat = prefix + struct.getSyllPart(targetSyll, maxOnset, -1, maxCoda);
}
}
if (feat != null) {
tt.setFeatureValue(feat, 1.0);
}
}
return inst;
}
示例5: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
int xx = 0;
for (int i = 0; i < ts.size(); i++) {
Token tok = ts.get(i);
for (String graph : Grams.iterateSymbols(tok.getText())) {
tok.setFeatureValue("SYLLSEQ_" + struct.getSyllSequenceForGraphemeIndex(xx), 1.0);
xx += 1;
}
}
return inst;
}
示例6: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
for (int i = 0; i < ts.size(); i++) {
Token tok = ts.get(i);
tok.setFeatureValue("SYLLCNT_" + struct.getSyllIndexForGraphoneGramIndex(i), 1.0);
}
return inst;
}
示例7: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
for (int i = 0; i < ts.size(); i++) {
Token tok = ts.get(i);
int j = 0;
for (String graph : Grams.iterateSymbols(tok.getText())) {
tok.setFeatureValue("SYLLCHROL_" + graph + "_" + struct.getOncCodeAtGraphoneAndSequence(i, j), 1.0);
j += 1;
}
}
return inst;
}
示例8: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
String lastToken = ts.get(ts.size() - 1).getText();
char lastChar = lastToken.charAt(lastToken.length() - 1);
if (lastChar != 'y' && lastChar != 'e') {
return inst;
}
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
int lastSyllIndex = struct.getLastSyllIndex();
for (int i = 0; i < ts.size(); i++) {
String tag = null;
Token t = ts.get(i);
String s = struct.oncGramForGraphoneIndex(i);
int thisSyllIndex = struct.getSyllIndexForGraphoneGramIndex(i);
String text = t.getText();
Preconditions.checkState(text.length() == s.length(), "grams doesnt match syll grams");
for (int j = 0; j < text.length(); j++) {
char textChar = text.charAt(j);
char syllChar = s.charAt(j);
if (Character.isWhitespace(textChar) || Character.isWhitespace(syllChar)) {
Preconditions.checkState(textChar == syllChar, "mismatched whitespace in gram");
continue;
}
if (Graphemes.isVowel(String.valueOf(textChar)) && syllChar == SyllTagTrainer.NucleusChar) {
// we care about nucleus vowels because those are the ones influence by trailing letters
if (tag == null) {
tag = "TE_VOWEL_" + textChar + "_" + (thisSyllIndex < lastSyllIndex ? "BEFORE" : "END");
}
}
}
if (tag != null) {
t.setFeatureValue(tag, 1.0);
}
}
return inst;
}
示例9: pipe
import cc.mallet.types.TokenSequence; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance inst) {
TokenSequence ts = (TokenSequence) inst.getData();
SyllStructure struct = (SyllStructure) ts.getProperty(PhonemeCrfTrainer.PROP_STRUCTURE);
checkNotNull(struct, "no sylls", inst);
int xx = 0;
for (int i = 0; i < ts.size(); i++) {
Token tok = ts.get(i);
for (String grapheme : Grams.iterateSymbols(tok.getText())) {
int relative;
int syllIndex = struct.getSyllIndexForGraphemeIndex(xx);
int mySyllSeq = struct.getSyllSequenceForGraphemeIndex(xx);
// 01201
// YYZYZ
if (isNext) {
int graphsInSyllable = struct.getSyllGraphsForSyllIndex(syllIndex).length(); // no zeroes
relative = (graphsInSyllable - mySyllSeq);
if (syllIndex == struct.getLastSyllIndex()) {
relative = -1;
}
} else {
relative = mySyllSeq + 1;
if (syllIndex == 0) {
relative = -1;
}
}
tok.setFeatureValue("NEARSYLL_" + (isNext ? "NEXT_" : "PREV_") + grapheme + "_" + relative, 1.0);
xx += 1;
}
}
return inst;
}