本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance.setSource方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setSource方法的具体用法?Java Instance.setSource怎么用?Java Instance.setSource使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.umass.cs.mallet.base.types.Instance
的用法示例。
在下文中一共展示了Instance.setSource方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
carrier.setSource (carrier.getData());
return carrier;
}
示例2: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (!(carrier.getData() instanceof CharSequence))
throw new ClassCastException ("carrier.data is a " + carrier.getData().getClass().getName() +
" not a CharSequence");
TokenSequence dataTokens = new TokenSequence ();
TokenSequence targetTokens = new TokenSequence ();
CharSequence string = (CharSequence) carrier.getData();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
while (!done) {
done = !findNextValidMatch (m);
if (done)
textEnd = string.length()-1;
else {
String sgml = m.group();
int groupCount = m.groupCount();
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
nextTag = m.group(0);
nextTag = sgml.substring(1, sgml.length()-1);
}
nextStart = m.end();
textEnd = m.start();
}
if (textEnd - textStart > 0) {
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
dataTokens.add (new Token ((String) lexer.next()));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
carrier.setSource(dataTokens);
return carrier;
}
示例3: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
CharSequence string = (CharSequence) carrier.getData();
StringTokenization dataTokens = new StringTokenization (string);
TokenSequence targetTokens = new TokenSequence ();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
logger.fine(sgmlPattern.pattern());
logger.finer(string.toString());
while (!done) {
done = !(m.find());
if (done)
textEnd = string.length(); // culotta: changed from string.length()-1
else {
String sgml = m.group();
logger.finer ("SGML = "+sgml);
int groupCount = m.groupCount();
logger.finer(Integer.toString (groupCount));
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
//nextTag = m.group(0).intern();
nextTag = sgml.substring(1, sgml.length()-1);
}
logger.finer("nextTag: " + nextTag);
nextStart = m.end(); // m.end returns one beyond index of last match char
textEnd = m.start(); // String.subtring does not include index end
logger.finer ("Text start/end "+textStart+" "+textEnd);
}
if (textEnd - textStart > 0) {
logger.finer ("Tag = "+tag);
logger.finer ("Target = "+string.subSequence (textStart, textEnd));
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
lexer.next ();
int tokStart = textStart + lexer.getStartOffset ();
int tokEnd = textStart + lexer.getEndOffset ();
dataTokens.add (new StringSpan (string, tokStart, tokEnd));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
if (saveSource)
carrier.setSource(dataTokens);
return carrier;
}
示例4: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
String sentenceLines = (String) carrier.getData();
String[] tokens = sentenceLines.split ("\n");
TokenSequence data = new TokenSequence (tokens.length);
LabelSequence target = new LabelSequence ((LabelAlphabet)getTargetAlphabet(), tokens.length);
StringBuffer source = saveSource ? new StringBuffer() : null;
String prevLabel = "NOLABEL";
String word, label;
String[] words = new String[tokens.length];
for (int i = 0; i < tokens.length; i++) {
if (tokens[i].length() != 0) {
String[] features = tokens[i].split ("\t");
if (features.length != 2)
throw new IllegalStateException ("Line \""+sentenceLines + " " + tokens[i] +"\" doesn't have 2 elements");
word = features[0]; // .toLowerCase();
label = features[1];
} else {
word = "-<S>-";
label = "O";
}
words[i] = word;
if(label.startsWith("I-malignancy-type"))
label = "I-malignancy-type";
else if(label.startsWith("B-malignancy-type"))
label = "B-malignancy-type";
else
label = "O";
// Transformations
if (doDigitCollapses) {
if (word.matches ("19\\d\\d"))
word = "<YEAR>";
else if (word.matches ("19\\d\\ds"))
word = "<YEARDECADE>";
else if (word.matches ("19\\d\\d-\\d+"))
word = "<YEARSPAN>";
else if (word.matches ("\\d+\\\\/\\d"))
word = "<FRACTION>";
else if (word.matches ("\\d[\\d,\\.]*"))
word = "<DIGITS>";
else if (word.matches ("19\\d\\d-\\d\\d-\\d--d"))
word = "<DATELINEDATE>";
else if (word.matches ("19\\d\\d-\\d\\d-\\d\\d"))
word = "<DATELINEDATE>";
else if (word.matches (".*-led"))
word = "<LED>";
else if (word.matches (".*-sponsored"))
word = "<LED>";
}
if (doDowncasing)
word = word.toLowerCase();
Token token = new Token (word);
// Append
data.add (token);
//target.add (bigramLabel);
target.add (label);
//System.out.print (label + ' ');
if (saveSource) {
source.append (word); source.append (" ");
//source.append (bigramLabel); source.append ("\n");
source.append (label); source.append ("\n");
}
}
//System.out.println ("");
carrier.setData(data);
carrier.setTarget(target);
carrier.setName(words);
if (saveSource)
carrier.setSource(source);
return carrier;
}