本文整理汇总了Java中cc.mallet.types.Instance.setSource方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setSource方法的具体用法?Java Instance.setSource怎么用?Java Instance.setSource使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.Instance
的用法示例。
在下文中一共展示了Instance.setSource方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence newTs = new TokenSequence();
FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet());
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例2: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
StringBuffer sb = new StringBuffer();
String source = (String) carrier.getSource();
Sequence as = (Sequence) carrier.getTarget();
//int startLabelIndex = as.getAlphabet().lookupIndex("start");
for (int i = 0; i < source.length(); i++) {
System.out.println("target[" + i + "]=" + as.get(i).toString());
if (as.get(i).toString().equals("start") && i != 0)
sb.append(' ');
sb.append(source.charAt(i));
}
carrier.setSource(sb.toString());
System.out.println("carrier.getSource() = " + carrier.getSource());
return carrier;
}
示例3: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
carrier.setSource (carrier.getData());
return carrier;
}
示例4: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (!(carrier.getData() instanceof CharSequence))
throw new ClassCastException ("carrier.data is a " + carrier.getData().getClass().getName() +
" not a CharSequence");
TokenSequence dataTokens = new TokenSequence ();
TokenSequence targetTokens = new TokenSequence ();
CharSequence string = (CharSequence) carrier.getData();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
while (!done) {
done = !findNextValidMatch (m);
if (done)
textEnd = string.length()-1;
else {
String sgml = m.group();
int groupCount = m.groupCount();
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
nextTag = m.group(0);
nextTag = sgml.substring(1, sgml.length()-1);
}
nextStart = m.end();
textEnd = m.start();
}
if (textEnd - textStart > 0) {
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
dataTokens.add (new Token ((String) lexer.next()));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
carrier.setSource(dataTokens);
return carrier;
}
示例5: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
CharSequence string = (CharSequence) carrier.getData();
StringTokenization dataTokens = new StringTokenization (string);
TokenSequence targetTokens = new TokenSequence ();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
logger.fine(sgmlPattern.pattern());
logger.finer(string.toString());
while (!done) {
done = !(m.find());
if (done)
textEnd = string.length(); // culotta: changed from string.length()-1
else {
String sgml = m.group();
logger.finer ("SGML = "+sgml);
int groupCount = m.groupCount();
logger.finer(Integer.toString (groupCount));
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
//nextTag = m.group(0);
nextTag = sgml.substring(1, sgml.length()-1);
}
logger.finer("nextTag: " + nextTag);
nextStart = m.end(); // m.end returns one beyond index of last match char
textEnd = m.start(); // String.subtring does not include index end
logger.finer ("Text start/end "+textStart+" "+textEnd);
}
if (textEnd - textStart > 0) {
logger.finer ("Tag = "+tag);
logger.finer ("Target = "+string.subSequence (textStart, textEnd));
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
lexer.next ();
int tokStart = textStart + lexer.getStartOffset ();
int tokEnd = textStart + lexer.getEndOffset ();
dataTokens.add (new StringSpan (string, tokStart, tokEnd));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
if (saveSource) {
carrier.setSource(dataTokens);
}
return carrier;
}