当前位置: 首页>>代码示例>>Java>>正文


Java Instance.getData方法代码示例

本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance.getData方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.getData方法的具体用法?Java Instance.getData怎么用?Java Instance.getData使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在edu.umass.cs.mallet.base.types.Instance的用法示例。


在下文中一共展示了Instance.getData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getUnnormalizedClassificationScores

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
 //  arrayOutOfBounds if pipe has grown since training 
 //        int numFeatures = getAlphabet().size() + 1;
    int numFeatures = this.defaultFeatureIndex + 1;

    int numLabels = getLabelAlphabet().size();
    assert (scores.length == numLabels);
    FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe);
    // Make sure the feature vector's feature dictionary matches
    // what we are expecting from our data pipe (and thus our notion
    // of feature probabilities.
    assert (fv.getAlphabet ()
            == this.instancePipe.getDataAlphabet ());

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
        scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
                + MatrixOps.rowDotProduct (parameters, numFeatures,
                        li, fv,
                        defaultFeatureIndex,
                        (perClassFeatureSelection == null
                ? featureSelection
                : perClassFeatureSelection[li]));
    }
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:27,代码来源:MCMaxEnt.java

示例2: pipeOutputAccumulate

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public void pipeOutputAccumulate (Instance carrier,	Pipe iteratedPipe)
{
	// xxx ??? assert (iteratedPipe == pipe);
	// The assertion above won't be true when using IteratedPipe...
	//logger.fine ("pipeOutputAccumulate target="+target);
	// These various add() methods below will make sure that the Pipes match appropriately
	if (carrier.getData() instanceof InstanceList)
		add ((InstanceList)carrier.getData());
	else if (carrier.getData() instanceof PipeInputIterator)
		add ((PipeInputIterator)carrier.getData());
	else if (carrier.getData() instanceof Instance)
		add ((Instance)carrier.getData());
	else {
		if (pipe == notYetSetPipe)
			pipe = iteratedPipe;
		//System.out.println ("Instance.pipeOuputAccumulate carrier.getSource()="+carrier.getSource());
     // Carrier has already been piped; make sure not to repipe it.
		add (carrier);
	}
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:21,代码来源:InstanceList.java

示例3: classify

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
/**
 * Classifies an instance using Winnow's weights
 * @param instance an instance to be classified
 * @return an object containing the classifier's guess
    */
public Classification classify (Instance instance){
	int numClasses = getLabelAlphabet().size();
	double[] scores = new double[numClasses];
	FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe);
	// Make sure the feature vector's feature dictionary matches
	// what we are expecting from our data pipe (and thus our notion
	// of feature probabilities.
	assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ());
	int fvisize = fv.numLocations();
	
	// Set the scores by summing wi*xi
	for (int fvi = 0; fvi < fvisize; fvi++) {
		int fi = fv.indexAtLocation (fvi);
		for (int ci = 0; ci < numClasses; ci++)
	    scores[ci] += this.weights[ci][fi];
	}
	
	
	// Create and return a Classification object
	return new Classification (instance, this,
														 new LabelVector (getLabelAlphabet(),
																							scores));
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:29,代码来源:Winnow.java

示例4: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	// xxx This doesn't seem so efficient.  Perhaps have TokenSequence
	// use a LinkedList, and remove Tokens from it? -?
	// But a LinkedList implementation of TokenSequence would be quite inefficient -AKM
	TokenSequence ret = new TokenSequence ();
	Token prevToken = null;
	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.getToken(i);
		String s = t.getText();
		if (CharSequenceLexer.LEX_ALPHA.matcher(s).matches()) {
			ret.add (t);
			prevToken = t;
		}	else if (markDeletions && prevToken != null)
			prevToken.setProperty (FeatureSequenceWithBigrams.deletionMark, t.getText());
	}
	carrier.setData(ret);
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:21,代码来源:TokenSequenceRemoveNonAlpha.java

示例5: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
	TokenSequence ts = (TokenSequence) carrier.getData ();
	for (int i=0; i < ts.size(); i++) {
		Token t = ts.getToken (i);
		String[] values = t.getText().split("\\s+");
		for (int j=0; j < values.length; j++) {
			if (specifyFeatureNames) {
				String[] nameAndValue = values[j].split(nameValueSeparator);						
				if (nameAndValue.length != 2) { // no feature name. use token as feature.
					t.setFeatureValue ("Token="+values[j], 1.0);
				}
				else {
					t.setFeatureValue (nameAndValue[0], Double.parseDouble (nameAndValue[1]));						
				}
			}
			else if (realValued) {
				t.setFeatureValue ("Feature#" + j, Double.parseDouble (values[j]));
			}
			else
				t.setFeatureValue (values[j], 1.0);					
		}
	}
	carrier.setData (ts);
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:26,代码来源:TokenSequenceParseFeatureString.java

示例6: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int depth = 0;
    for (int i = 0; i < ts.size(); i++) {
        Token t = ts.getToken(i);
        String s = t.getText();
        s = ignoreCase ? s.toLowerCase() : s;
        if (s.equals("(")) {
            depth++;
            t.setFeatureValue(name, 1.0);
        } else if (s.equals(")")) {
            depth--;
            t.setFeatureValue(name, 1.0);
        } else if (depth > 0)
            t.setFeatureValue(name, 1.0);

    }
    return carrier;
}
 
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:20,代码来源:InBracket.java

示例7: averageTokenAccuracy

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public double averageTokenAccuracy (InstanceList ilist)
{
	double accuracy = 0;
	for (int i = 0; i < ilist.size(); i++) {
		Instance instance = ilist.getInstance(i);
		Sequence input = (Sequence) instance.getData();
		Sequence output = (Sequence) instance.getTarget();
		assert (input.size() == output.size());
		double pathAccuracy = viterbiPath(input).tokenAccuracy(output);
		accuracy += pathAccuracy;
		logger.info ("Transducer path accuracy = "+pathAccuracy);
	}
	return accuracy/ilist.size();
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:15,代码来源:Transducer.java

示例8: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
/**
  * Convert the data in the given <tt>Instance</tt> from a <tt>CharSequence</tt> 
  * of sparse feature-value pairs to a <tt>FeatureVector</tt>
  *
  * @throws IllegalStateException If <CODE>Instance.getTarget()</CODE> is
  * not a Labeling
  */
 public Instance pipe(Instance carrier) 
     throws IllegalStateException
 {
     CharSequence c = (CharSequence) carrier.getData();
     String[] pairs = c.toString().trim().split("\\s+");
     String[] keys = new String[pairs.length];
     double[] values = new double[pairs.length];

     for (int i = 0; i < pairs.length; i++) {
int delimIndex = pairs[i].lastIndexOf(":");
if (delimIndex <= 0 || delimIndex == (pairs[i].length()-1))
    throw new IllegalStateException("token is not a valid feature name-feature value pair: "
	        + pairs[i] + "\nfaulting instance name:" + carrier.getName());

keys[i] = pairs[i].substring(0, delimIndex);
values[i] = Double.parseDouble(pairs[i].substring(delimIndex+1));

dataDict.lookupIndex(keys[i], true); // add the feature name
     }
     // Sort indices beforehand to prevent the bubble sort used in
     // constructor of SparseVector from taking too much time
     int[] keyIndices = FeatureVector.getObjectIndices(keys, dataDict, true);
     java.util.Arrays.sort(keyIndices);
     FeatureVector fv = new FeatureVector(dataDict, keyIndices, values);
     // Check if we've set the target alphabet member
     if (targetDict == null) {
if (carrier.getTarget() instanceof Labeling)
    targetDict = ((Labeling)carrier.getTarget()).getLabelAlphabet();
else
    throw new IllegalStateException ("Instance target is not a " +
	         "Labeling; it is a " + 
	         carrier.getTarget().getClass().getName());
	  
     }
     
     carrier.setData( fv );
     return carrier;
 }
 
开发者ID:clulab,项目名称:reach-banner,代码行数:46,代码来源:Csv2FeatureVector.java

示例9: nextInstance

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance nextInstance ()
{
  final Instance instance = iter.nextInstance ();
  Instance ret = new Instance (instance.getData(), instance.getTarget(), instance.getName(), instance.getSource());
  ret.setPropertyList (instance.getPropertyList ());
  return ret;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:8,代码来源:InstanceListIterator.java

示例10: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.getToken(i);
		t.setText(t.getText().toLowerCase());
	}
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:10,代码来源:TokenSequenceLowercase.java

示例11: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (carrier.getData() instanceof CharSequence)
		carrier.setData(new TokenSequence (ngramify ((CharSequence)carrier.getData())));
	else if (carrier.getData() instanceof TokenSequence) {
		TokenSequence ts = (TokenSequence) carrier.getData();
		TokenSequence ret = new TokenSequence ();
		for (int i = 0; i < ts.size(); i++)
			ret.add (ngramify (ts.getToken(i).getText()));
		carrier.setData(ret);
	} else
		throw new IllegalArgumentException ("Unhandled type "+carrier.getData().getClass());
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:15,代码来源:CharSequence2CharNGrams.java

示例12: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	FeatureSequence ret =
		new FeatureSequence ((Alphabet)getDataAlphabet(), ts.size());
	for (int i = 0; i < ts.size(); i++) {
		ret.add (ts.getToken(i).getText());
	}
	carrier.setData(ret);
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:12,代码来源:TokenSequence2FeatureSequence.java

示例13: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	try {
		if (carrier.getData() instanceof File)
			carrier.setData(pipe ((File)carrier.getData()));
		else if (carrier.getData() instanceof BufferedReader)
			carrier.setData(pipe ((BufferedReader)carrier.getData()));
		else
			throw new IllegalArgumentException ("Doesn't handle class "+carrier.getClass());
	} catch (IOException e) {
		throw new IllegalArgumentException ("IOException");
	}
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:15,代码来源:SourceLocation2TokenSequence.java

示例14: iteratePipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public static PipeOutputAccumulator iteratePipe (Pipe iteratedPipe,
																								 PipeOutputAccumulator accumulator,
																								 Instance carrier)
{
	PipeInputIterator iter = (PipeInputIterator) carrier.getData();
	iter.setParentInstance (carrier);
	while (iter.hasNext()) {
     // Make sure that instance.pipe field gets set when piping instance.
     Instance subInstance = iter.nextInstance();
     Instance pipedInstance = new Instance (subInstance.getData (), subInstance.getTarget (),
                                            subInstance.getName (), subInstance.getSource (), iteratedPipe);
     accumulator.pipeOutputAccumulate (pipedInstance, iteratedPipe);
   }
	return accumulator;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:16,代码来源:IteratingPipe.java

示例15: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
  Object data = carrier.getData ();
  if (data instanceof Tokenization) {
    // we're done
  } else if (data instanceof TokenSequence) {
    StringBuffer buf = new StringBuffer ();
    TokenSequence ts = (TokenSequence) data;
    StringTokenization spans = new StringTokenization (buf);  // I can use a StringBuffer as the doc! Awesome!

    for (int i = 0; i < ts.size(); i++) {
      Token token = ts.getToken (i);

      int start = buf.length ();
      buf.append (token.getText());
      int end = buf.length();

      spans.add (new StringSpan (buf, start, end));
      buf.append (" ");
    }

    carrier.setData (spans);
  } else {
    throw new IllegalArgumentException ("Can't convert "+data+" to Tokenization.");
  }

  return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:29,代码来源:TokenSequence2Tokenization.java


注:本文中的edu.umass.cs.mallet.base.types.Instance.getData方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。