当前位置: 首页>>代码示例>>Java>>正文


Java Instance.setData方法代码示例

本文整理汇总了Java中cc.mallet.types.Instance.setData方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setData方法的具体用法?Java Instance.setData怎么用?Java Instance.setData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.Instance的用法示例。


在下文中一共展示了Instance.setData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: extractIndependentFeaturesHelper

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
 * Extract the independent features for a single instance annotation.
 * Extract the independent features for a single annotation according to the information
 * in the featureInfo object. The information in the featureInfo instance gets updated 
 * by this. 
 * NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
 * @param instanceAnnotation
 * @param inputAS
 * @param targetFeatureName
 * @param featureInfo
 * @param pipe
 * @param nameFeature
 * @return 
 */
static Instance extractIndependentFeaturesHelper(
        Annotation instanceAnnotation,
        AnnotationSet inputAS,
        FeatureInfo featureInfo,
        Pipe pipe) {
  
  AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
  // Constructor parms: data, target, name, source
  Instance inst = new Instance(afv, null, null, null);
  for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
    FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
  }
  // TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
  // but it is not clear if this is beneficial - our assumption is that yes.
  inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
  return inst;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:32,代码来源:CorpusRepresentationMalletTarget.java

示例2: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {

    TokenSequence ts = (TokenSequence) carrier.getData();
    for (int i = 0; i < ts.size(); i++) {
        Token t = ts.get(i);
        int splitLength = t.getText().split("\t").length;
        if (splitLength == this.minLineLength) {
            t.setText("O\t" + t.getText());
        } else {
            if (splitLength != (this.minLineLength + 1)) {
                System.err.println("input line does not have length " + this.minLineLength + " or "
                        + (this.minLineLength + 1) + " but " + splitLength + ": " + t.getText());
            }
        }
    }
    carrier.setData(ts);

    return carrier;
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:21,代码来源:AddTargetToLinePipe.java

示例3: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {

    TokenSequence ts = (TokenSequence) carrier.getData();
    TokenSequence targetTokenSeq = new TokenSequence(ts.size());

    for (int i = 0; i < ts.size(); i++) {

        Token t = ts.get(i);
        // System.out.println(t.getText());
        String lineWithoutFirst = t.getText().replaceFirst("[^\\t]*\t", "");
        // System.out.println(lineWithoutFirst);
        // targetTokenSeq.add(lineSplit[0]);

        targetTokenSeq.add(t.getText().split("\t")[0]);
        t.setText(lineWithoutFirst);

    }
    carrier.setTarget(targetTokenSeq);
    carrier.setData(ts);

    return carrier;
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:24,代码来源:LineToTargetTextPipe.java

示例4: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	String newTerm = null;
	TokenSequence tmpTS = new TokenSequence();
	TokenSequence ts = (TokenSequence) carrier.getData();

	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.get(i);
		for(int j = 0; j < gramSizes.length; j++) {
			int len = gramSizes[j];
			if (len <= 0 || len > (i+1)) continue;
			if (len == 1) { tmpTS.add(t); continue; }
			newTerm = new String(t.getText());
			for(int k = 1; k < len; k++)
				newTerm = ts.get(i-k).getText() + "_" + newTerm;
			tmpTS.add(newTerm);
		}
	}

	carrier.setData(tmpTS);

	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:TokenSequenceNGrams.java

示例5: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Convert the data in an <CODE>Instance</CODE> from a CharSequence
 * of comma-separated-values to an array, where each index is the
 * feature name.
 */
public Instance pipe(  Instance carrier ) {
	
	CharSequence c = (CharSequence)carrier.getData();
	int nf = countNumberFeatures (c);
	if (numberFeatures == -1) // first instance seen
		numberFeatures = nf;
	else if (numberFeatures != nf)
		throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf);
	double[] feats = new double[numberFeatures];
	lexer.setCharSequence (c);
	int i=0;
	while (lexer.hasNext()) 
		feats[i++] = Double.parseDouble ((String)lexer.next());
	carrier.setData (feats);
	return carrier;
	
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:Csv2Array.java

示例6: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {

		String[] fields = carrier.getData().toString().split("\\s+");

		int numFields = fields.length;
		
		Object[] featureNames = new Object[numFields];
		double[] featureValues = new double[numFields];

		for (int i = 0; i < numFields; i++) {
			if (fields[i].contains("=")) {
				String[] subFields = fields[i].split("=");
				featureNames[i] = subFields[0];
				featureValues[i] = Double.parseDouble(subFields[1]);
			}
			else {
				featureNames[i] = fields[i];
				featureValues[i] = 1.0;
			}
		}

		carrier.setData(new FeatureVector(getDataAlphabet(), featureNames, featureValues));
		
		return carrier;
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:FeatureValueString2FeatureVector.java

示例7: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
	TokenSequence ts = (TokenSequence) carrier.getData ();
	for (int i=0; i < ts.size(); i++) {
		Token t = ts.get (i);
		String[] values = t.getText().split("\\s+");
		for (int j=0; j < values.length; j++) {
			if (specifyFeatureNames) {
				String[] nameAndValue = values[j].split(nameValueSeparator);						
				if (nameAndValue.length != 2) { // no feature name. use token as feature.
					t.setFeatureValue ("Token="+values[j], 1.0);
				}
				else {
					t.setFeatureValue (nameAndValue[0], Double.parseDouble (nameAndValue[1]));						
				}
			}
			else if (realValued) {
				t.setFeatureValue ("Feature#" + j, Double.parseDouble (values[j]));
			}
			else
				t.setFeatureValue (values[j], 1.0);					
		}
	}
	carrier.setData (ts);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:TokenSequenceParseFeatureString.java

示例8: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	// xxx This doesn't seem so efficient.  Perhaps have TokenSequence
	// use a LinkedList, and remove Tokens from it? -?
	// But a LinkedList implementation of TokenSequence would be quite inefficient -AKM
	TokenSequence ret = new TokenSequence ();
	Token prevToken = null;
	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.get(i);
		if (! stoplist.contains (caseSensitive ? t.getText() : t.getText().toLowerCase())) {
			// xxx Should we instead make and add a copy of the Token?
			ret.add (t);
			prevToken = t;
		} else if (markDeletions && prevToken != null)
			prevToken.setProperty (FeatureSequenceWithBigrams.deletionMark, t.getText());
	}
	carrier.setData(ret);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:TokenSequenceRemoveStopwords.java

示例9: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
	{
		try {
			if (carrier.getData() instanceof URI)
				carrier.setData(pipe ((URI)carrier.getData()));
			else if (carrier.getData() instanceof File)
				carrier.setData(pipe ((File)carrier.getData()));
			else if (carrier.getData() instanceof Reader)
				carrier.setData(pipe ((Reader)carrier.getData()));
			else if (carrier.getData() instanceof CharSequence)
				;																	// No conversion necessary
			else
				throw new IllegalArgumentException ("Does not handle class "+carrier.getData().getClass());

		} catch (java.io.IOException e) {
			throw new IllegalArgumentException ("IOException " + e);
		}

//		System.out.println(carrier.getData().toString());
		return carrier;
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:Input2CharSequence.java

示例10: transduce

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Take input sequence from instance.data and put the output sequence in instance.data. */
public Instance transduce (Instance instance)
{
	if (inputPipe != null)
		instance = inputPipe.instanceFrom(instance);
	// TODO Use MaxLatticeFactory instead of hardcoding 
	instance.setData(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence());
	if (outputPipe != null)
		instance = outputPipe.instanceFrom(instance);
	return instance;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:Transducer.java

示例11: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
  StringTokenization ts =  (StringTokenization) carrier.getData();
  StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
   final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
   LabelSequence labelSeq = new LabelSequence(dict);
   Label start = dict.lookupLabel ("start");
   Label notstart = dict.lookupLabel ("notstart");

  boolean lastWasSpace = true;
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < ts.size(); i++) {
    StringSpan t = (StringSpan) ts.getSpan(i);
    if (t.getText().equals(" "))
      lastWasSpace = true;
    else {
      sb.append(t.getText());
      newTs.add(t);
      labelSeq.add(lastWasSpace ? "start" : "notstart");
      lastWasSpace = false;
    }
  }
  if (isTargetProcessing())
    carrier.setTarget(labelSeq);
  carrier.setData(newTs);
  carrier.setSource(sb.toString());
  return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestMEMM.java

示例12: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	CharSequence string = (CharSequence) carrier.getData();
	lexer.setCharSequence (string);
	TokenSequence ts = new StringTokenization (string);
	while (lexer.hasNext()) {
		lexer.next();
		ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ()));
	}
	carrier.setData(ts);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:13,代码来源:CharSequence2TokenSequence.java

示例13: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (carrier.getData() instanceof CharSequence)
		carrier.setData(new TokenSequence (ngramify ((CharSequence)carrier.getData())));
	else if (carrier.getData() instanceof TokenSequence) {
		TokenSequence ts = (TokenSequence) carrier.getData();
		TokenSequence ret = new TokenSequence ();
		for (int i = 0; i < ts.size(); i++)
			ret.add (ngramify (ts.get(i).getText()).toString());
		carrier.setData(ret);
	} else
		throw new IllegalArgumentException ("Unhandled type "+carrier.getData().getClass());
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:CharSequence2CharNGrams.java

示例14: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (!(carrier.getData() instanceof String))
		throw new IllegalArgumentException ("Expecting String, got " + carrier.getData().getClass().getName());		
	String s = (String) carrier.getData();
	String newline = System.getProperty ("line.separator");
	s = s.replaceAll (newline, delim);
	carrier.setData (s);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:11,代码来源:StringAddNewLineDelimiter.java

示例15: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	FeatureSequence ret =
		new FeatureSequence ((Alphabet)getDataAlphabet(), ts.size());
	for (int i = 0; i < ts.size(); i++) {
		ret.add (ts.get(i).getText());
	}
	carrier.setData(ret);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:TokenSequence2FeatureSequence.java


注:本文中的cc.mallet.types.Instance.setData方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。