本文整理汇总了Java中cc.mallet.types.Instance.setData方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setData方法的具体用法?Java Instance.setData怎么用?Java Instance.setData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.Instance
的用法示例。
在下文中一共展示了Instance.setData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractIndependentFeaturesHelper
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
* Extract the independent features for a single instance annotation.
* Extract the independent features for a single annotation according to the information
* in the featureInfo object. The information in the featureInfo instance gets updated
* by this.
* NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
* @param instanceAnnotation
* @param inputAS
* @param targetFeatureName
* @param featureInfo
* @param pipe
* @param nameFeature
* @return
*/
static Instance extractIndependentFeaturesHelper(
Annotation instanceAnnotation,
AnnotationSet inputAS,
FeatureInfo featureInfo,
Pipe pipe) {
AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
// Constructor parms: data, target, name, source
Instance inst = new Instance(afv, null, null, null);
for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
}
// TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
// but it is not clear if this is beneficial - our assumption is that yes.
inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
return inst;
}
示例2: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
int splitLength = t.getText().split("\t").length;
if (splitLength == this.minLineLength) {
t.setText("O\t" + t.getText());
} else {
if (splitLength != (this.minLineLength + 1)) {
System.err.println("input line does not have length " + this.minLineLength + " or "
+ (this.minLineLength + 1) + " but " + splitLength + ": " + t.getText());
}
}
}
carrier.setData(ts);
return carrier;
}
示例3: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence targetTokenSeq = new TokenSequence(ts.size());
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
// System.out.println(t.getText());
String lineWithoutFirst = t.getText().replaceFirst("[^\\t]*\t", "");
// System.out.println(lineWithoutFirst);
// targetTokenSeq.add(lineSplit[0]);
targetTokenSeq.add(t.getText().split("\t")[0]);
t.setText(lineWithoutFirst);
}
carrier.setTarget(targetTokenSeq);
carrier.setData(ts);
return carrier;
}
示例4: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
String newTerm = null;
TokenSequence tmpTS = new TokenSequence();
TokenSequence ts = (TokenSequence) carrier.getData();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
for(int j = 0; j < gramSizes.length; j++) {
int len = gramSizes[j];
if (len <= 0 || len > (i+1)) continue;
if (len == 1) { tmpTS.add(t); continue; }
newTerm = new String(t.getText());
for(int k = 1; k < len; k++)
newTerm = ts.get(i-k).getText() + "_" + newTerm;
tmpTS.add(newTerm);
}
}
carrier.setData(tmpTS);
return carrier;
}
示例5: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Convert the data in an <CODE>Instance</CODE> from a CharSequence
* of comma-separated-values to an array, where each index is the
* feature name.
*/
public Instance pipe( Instance carrier ) {
CharSequence c = (CharSequence)carrier.getData();
int nf = countNumberFeatures (c);
if (numberFeatures == -1) // first instance seen
numberFeatures = nf;
else if (numberFeatures != nf)
throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf);
double[] feats = new double[numberFeatures];
lexer.setCharSequence (c);
int i=0;
while (lexer.hasNext())
feats[i++] = Double.parseDouble ((String)lexer.next());
carrier.setData (feats);
return carrier;
}
示例6: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
String[] fields = carrier.getData().toString().split("\\s+");
int numFields = fields.length;
Object[] featureNames = new Object[numFields];
double[] featureValues = new double[numFields];
for (int i = 0; i < numFields; i++) {
if (fields[i].contains("=")) {
String[] subFields = fields[i].split("=");
featureNames[i] = subFields[0];
featureValues[i] = Double.parseDouble(subFields[1]);
}
else {
featureNames[i] = fields[i];
featureValues[i] = 1.0;
}
}
carrier.setData(new FeatureVector(getDataAlphabet(), featureNames, featureValues));
return carrier;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:FeatureValueString2FeatureVector.java
示例7: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData ();
for (int i=0; i < ts.size(); i++) {
Token t = ts.get (i);
String[] values = t.getText().split("\\s+");
for (int j=0; j < values.length; j++) {
if (specifyFeatureNames) {
String[] nameAndValue = values[j].split(nameValueSeparator);
if (nameAndValue.length != 2) { // no feature name. use token as feature.
t.setFeatureValue ("Token="+values[j], 1.0);
}
else {
t.setFeatureValue (nameAndValue[0], Double.parseDouble (nameAndValue[1]));
}
}
else if (realValued) {
t.setFeatureValue ("Feature#" + j, Double.parseDouble (values[j]));
}
else
t.setFeatureValue (values[j], 1.0);
}
}
carrier.setData (ts);
return carrier;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:TokenSequenceParseFeatureString.java
示例8: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
// xxx This doesn't seem so efficient. Perhaps have TokenSequence
// use a LinkedList, and remove Tokens from it? -?
// But a LinkedList implementation of TokenSequence would be quite inefficient -AKM
TokenSequence ret = new TokenSequence ();
Token prevToken = null;
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
if (! stoplist.contains (caseSensitive ? t.getText() : t.getText().toLowerCase())) {
// xxx Should we instead make and add a copy of the Token?
ret.add (t);
prevToken = t;
} else if (markDeletions && prevToken != null)
prevToken.setProperty (FeatureSequenceWithBigrams.deletionMark, t.getText());
}
carrier.setData(ret);
return carrier;
}
示例9: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
try {
if (carrier.getData() instanceof URI)
carrier.setData(pipe ((URI)carrier.getData()));
else if (carrier.getData() instanceof File)
carrier.setData(pipe ((File)carrier.getData()));
else if (carrier.getData() instanceof Reader)
carrier.setData(pipe ((Reader)carrier.getData()));
else if (carrier.getData() instanceof CharSequence)
; // No conversion necessary
else
throw new IllegalArgumentException ("Does not handle class "+carrier.getData().getClass());
} catch (java.io.IOException e) {
throw new IllegalArgumentException ("IOException " + e);
}
// System.out.println(carrier.getData().toString());
return carrier;
}
示例10: transduce
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Take input sequence from instance.data and put the output sequence in instance.data. */
public Instance transduce (Instance instance)
{
if (inputPipe != null)
instance = inputPipe.instanceFrom(instance);
// TODO Use MaxLatticeFactory instead of hardcoding
instance.setData(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence());
if (outputPipe != null)
instance = outputPipe.instanceFrom(instance);
return instance;
}
示例11: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
StringTokenization ts = (StringTokenization) carrier.getData();
StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
LabelSequence labelSeq = new LabelSequence(dict);
Label start = dict.lookupLabel ("start");
Label notstart = dict.lookupLabel ("notstart");
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
StringSpan t = (StringSpan) ts.getSpan(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例12: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
CharSequence string = (CharSequence) carrier.getData();
lexer.setCharSequence (string);
TokenSequence ts = new StringTokenization (string);
while (lexer.hasNext()) {
lexer.next();
ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ()));
}
carrier.setData(ts);
return carrier;
}
示例13: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (carrier.getData() instanceof CharSequence)
carrier.setData(new TokenSequence (ngramify ((CharSequence)carrier.getData())));
else if (carrier.getData() instanceof TokenSequence) {
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence ret = new TokenSequence ();
for (int i = 0; i < ts.size(); i++)
ret.add (ngramify (ts.get(i).getText()).toString());
carrier.setData(ret);
} else
throw new IllegalArgumentException ("Unhandled type "+carrier.getData().getClass());
return carrier;
}
示例14: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (!(carrier.getData() instanceof String))
throw new IllegalArgumentException ("Expecting String, got " + carrier.getData().getClass().getName());
String s = (String) carrier.getData();
String newline = System.getProperty ("line.separator");
s = s.replaceAll (newline, delim);
carrier.setData (s);
return carrier;
}
示例15: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
FeatureSequence ret =
new FeatureSequence ((Alphabet)getDataAlphabet(), ts.size());
for (int i = 0; i < ts.size(); i++) {
ret.add (ts.get(i).getText());
}
carrier.setData(ret);
return carrier;
}