当前位置: 首页>>代码示例>>Java>>正文


Java Instance类代码示例

本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance的典型用法代码示例。如果您正苦于以下问题:Java Instance类的具体用法?Java Instance怎么用?Java Instance使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Instance类属于edu.umass.cs.mallet.base.types包,在下文中一共展示了Instance类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: averageTokenAccuracy

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public double averageTokenAccuracy (InstanceList ilist, String fileName)
{
	double accuracy = 0;
	PrintWriter out;
	File f = new File(fileName);
	try {
		out = new PrintWriter(new FileWriter(f));
	}
	catch (IOException e) {
		out = null;
	}
	for (int i = 0; i < ilist.size(); i++) {
		Instance instance = ilist.getInstance(i);
		Sequence input = (Sequence) instance.getData();
		Sequence output = (Sequence) instance.getTarget();
		assert (input.size() == output.size());
		double pathAccuracy = viterbiPath(input).tokenAccuracy(output, out);
		accuracy += pathAccuracy;
		logger.info ("Transducer path accuracy = "+pathAccuracy);
	}
	out.close();
	return accuracy/ilist.size();
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:24,代码来源:Transducer.java

示例2: test

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void test (Transducer crf, InstanceList data, String description, PrintStream viterbiOutputStream)
{
  int correct = 0;
  for (int i = 0; i < data.size(); i++) {
    Instance instance = data.getInstance(i);
    Sequence input = (Sequence) instance.getData();
    Sequence trueOutput = (Sequence) instance.getTarget();
    assert (input.size() == trueOutput.size());
    Sequence predOutput = crf.transduce (input);
    assert (predOutput.size() == trueOutput.size());
    if (sequencesMatch (trueOutput, predOutput))
      correct++;
    }
  double acc = ((double)correct) / data.size();
  logger.info (description+" Num instances = "+data.size()+"  Num correct = "+correct);
  logger.info (description+" Per-instance accuracy = "+acc);
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:18,代码来源:InstanceAccuracyEvaluator.java

示例3: add

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list. Note that since memory for
  * the Instance has already been allocated, no check is made to
  * catch OutOfMemoryError.
  * @return <code>true</code> if successful
  */
public boolean add (Instance instance)
{
	if (pipe == notYetSetPipe)
		pipe = instance.getPipe();
	else if (instance.getPipe() != pipe)
		// Making sure that the Instance has the same pipe as us.
		// xxx This also is a good time check that the constituent data is
		// of a consistent type?
		throw new IllegalArgumentException ("pipes don't match: instance: "+
																				instance.getPipe()+" Instance.list: "+
																				this.pipe);
	if (dataClass == null) {
		dataClass = instance.data.getClass();
     if (pipe != null && pipe.isTargetProcessing())
       targetClass = instance.target.getClass();
	}
	instance.setLock();
	boolean ret = instances.add (instance);
	inMemory.set(size()-1);
	logger.finer ("Added instance " + (size()-1) + ". Free memory remaining (bytes): " +
							 Runtime.getRuntime().freeMemory());
		return ret;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:29,代码来源:PagedInstanceList.java

示例4: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	if (ts.size() > 3
			&& (ts.getToken(2).getText().equals("-") || ts.getToken(3).getText().equals("-"))
			&& ts.getToken(1).getText().matches("[A-Z]+")) {
		String header = ts.getToken(1).getText();
		if (header.equals("PRESS"))				// Don't bother with "PRESS DIGEST" headers
			return carrier;
		String featureName = "HEADER="+header;
		for (int i = 0; i < ts.size(); i++) {
			Token t = ts.getToken(i);
			// Only apply this feature to capitalized words, because if we apply it to everything
			// we easily get an immense number of possible feature conjunctions, (e.g. every word
			// with each of these HEADER= features.
			if (t.getText().matches("^[A-Z].*"))
				t.setFeatureValue (featureName, 1.0);
		}
	}
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:TokenSequenceDocHeader.java

示例5: pipeOutputAccumulate

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void pipeOutputAccumulate (Instance carrier,	Pipe iteratedPipe)
{
	// xxx ??? assert (iteratedPipe == pipe);
	// The assertion above won't be true when using IteratedPipe...
	//logger.fine ("pipeOutputAccumulate target="+target);
	// These various add() methods below will make sure that the Pipes match appropriately
	if (carrier.getData() instanceof InstanceList)
		add ((InstanceList)carrier.getData());
	else if (carrier.getData() instanceof PipeInputIterator)
		add ((PipeInputIterator)carrier.getData());
	else if (carrier.getData() instanceof Instance)
		add ((Instance)carrier.getData());
	else {
		if (pipe == notYetSetPipe)
			pipe = iteratedPipe;
		//System.out.println ("Instance.pipeOuputAccumulate carrier.getSource()="+carrier.getSource());
     // Carrier has already been piped; make sure not to repipe it.
		add (carrier);
	}
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:21,代码来源:InstanceList.java

示例6: add

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list.
  * @return <code>true</code>
  */
public boolean add (Instance instance)
{
	if (pipe == notYetSetPipe)
		pipe = instance.getPipe();
	else if (instance.getPipe() != pipe)
		// Making sure that the Instance has the same pipe as us.
		// xxx This also is a good time check that the constituent data is
		// of a consistent type?
		throw new IllegalArgumentException ("pipes don't match: instance: "+
																				instance.getPipe()+" Instance.list: "+
																				this.pipe);
	if (dataClass == null) {
		dataClass = instance.data.getClass();
     if (pipe != null && pipe.isTargetProcessing())
       if (instance.target != null)
         targetClass = instance.target.getClass();
	}
	return instances.add (instance);
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:23,代码来源:InstanceList.java

示例7: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe(Instance carrier, int startingIndex)
{
	// System.err.println(pipes.size());
	for (int i = startingIndex; i < pipes.size(); i++)
	{
		// System.err.println("Pipe: " + i);
		Pipe p = (Pipe) pipes.get(i);
		if (p == null)
		{
			System.err.println("Pipe is null");
		} else
		{
			carrier = p.pipe(carrier);
		}
	}
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:18,代码来源:SerialPipes.java

示例8: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	int tsSize = ts.size();
	for (int i = tsSize-1; i >= 0; i--) {
		Token t = ts.getToken (i);
		String text = t.getText();
		if (featureRegex != null && !featureRegex.matcher(text).matches())
			continue;
		for (int j = 0; j < i; j++) {
			if (ts.getToken(j).getText().equals(text)) {
				PropertyList.Iterator iter = ts.getToken(j).getFeatures().iterator();
				while (iter.hasNext()) {
					iter.next();
					String key = iter.getKey();
					if (filterRegex == null || (filterRegex.matcher(key).matches() ^ !includeFiltered))
						t.setFeatureValue (namePrefix+key, iter.getNumericValue());
				}
				break;
			}
			if (firstMentionName != null)
				t.setFeatureValue (firstMentionName, 1.0);
		}
	}
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:27,代码来源:FeaturesOfFirstMention.java

示例9: getUnnormalizedClassificationScores

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
 //  arrayOutOfBounds if pipe has grown since training 
 //        int numFeatures = getAlphabet().size() + 1;
    int numFeatures = this.defaultFeatureIndex + 1;

    int numLabels = getLabelAlphabet().size();
    assert (scores.length == numLabels);
    FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe);
    // Make sure the feature vector's feature dictionary matches
    // what we are expecting from our data pipe (and thus our notion
    // of feature probabilities.
    assert (fv.getAlphabet ()
            == this.instancePipe.getDataAlphabet ());

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
        scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
                + MatrixOps.rowDotProduct (parameters, numFeatures,
                        li, fv,
                        defaultFeatureIndex,
                        (perClassFeatureSelection == null
                ? featureSelection
                : perClassFeatureSelection[li]));
    }
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:27,代码来源:MaxEnt.java

示例10: getPositionsAndTypes

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private void getPositionsAndTypes(Sentence sentence, TagPosition[] positions, MentionType[] types, boolean reverse)
{
    Instance instance = new Instance(sentence.getTrainingText(format, reverse), null, sentence.getTag(), null, forwardCRF.getInputPipe());
    Sequence tags = forwardCRF.viterbiPath((Sequence)instance.getData()).output();
    if (positions.length != tags.size())
        throw new IllegalArgumentException();
    if (types.length != tags.size())
        throw new IllegalArgumentException();
    for (int i = 0; i < tags.size(); i++)
    {
        // The tag string is e.g. "O" or "B-GENE"
        String[] split = tags.get(i).toString().split("-");
        positions[i] = TagPosition.valueOf(split[0]);
        // TODO Verify that the type stays the same
        if (split.length == 2)
            types[i] = MentionType.getType(split[1]);
    }
    if (reverse)
    {
        reverse(positions);
        reverse(types);
    }
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:24,代码来源:CRFTagger.java

示例11: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
	String newTerm = null;
	TokenSequence tmpTS = new TokenSequence();
	TokenSequence ts = (TokenSequence) carrier.getData();

	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.getToken(i);
		for(int j = 0; j < gramSizes.length; j++) {
			int len = gramSizes[j];
			if (len <= 0 || len > (i+1)) continue;
			if (len == 1) { tmpTS.add(t); continue; }
			newTerm = new String(t.getText());
			for(int k = 1; k < len; k++)
				newTerm = ts.getToken(i-k) + "_" + newTerm;
			tmpTS.add(newTerm);
		}
	}

	carrier.setData(tmpTS);

	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:24,代码来源:TokenSequenceNGrams.java

示例12: train

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private static CRF4 train(List<Sentence> sentences, int order, boolean useFeatureInduction, TagFormat format, Pipe pipe, boolean reverse)
{
    InstanceList instances = new InstanceList(pipe);
    for (Sentence sentence : sentences)
    {
        String text = sentence.getTrainingText(format, reverse);
        instances.add(new Instance(text, null, sentence.getTag(), null, pipe));
    }
    CRF4 crf = new CRF4(pipe, null);
    if (order == 1)
        crf.addStatesForLabelsConnectedAsIn(instances);
    else if (order == 2)
        crf.addStatesForBiLabelsConnectedAsIn(instances);
    else
        throw new IllegalArgumentException("Order must be equal to 1 or 2");
    if (useFeatureInduction)
        crf.trainWithFeatureInduction(instances, null, null, null, 99999, 100, 10, 1000, 0.5, false, new double[] {.2, .5, .8});
    else
        crf.train(instances, null, null, (MultiSegmentationEvaluator)null, 99999, 10, new double[] {.2, .5, .8});
    return crf;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:22,代码来源:CRFTagger.java

示例13: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Convert the data in an <CODE>Instance</CODE> from a CharSequence
 * of comma-separated-values to an array, where each index is the
 * feature name.
 */
public Instance pipe(  Instance carrier ) {
	
	CharSequence c = (CharSequence)carrier.getData();
	int nf = countNumberFeatures (c);
	if (numberFeatures == -1) // first instance seen
		numberFeatures = nf;
	else if (numberFeatures != nf)
		throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf);
	double[] feats = new double[numberFeatures];
	lexer.setCharSequence (c);
	int i=0;
	while (lexer.hasNext()) 
		feats[i++] = Double.parseDouble ((String)lexer.next());
	carrier.setData (feats);
	return carrier;
	
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:Csv2Array.java

示例14: main

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public static void main (String[] args)
{
	try {
		for (int i = 0; i < args.length; i++) {
			Instance carrier = new Instance (new File(args[i]), null, null, null);
			Pipe p = new SerialPipes (new Pipe[] {
				new Input2CharSequence (),
				new CharSequence2TokenSequence(new CharSequenceLexer())});
			carrier = p.pipe (carrier);
			TokenSequence ts = (TokenSequence) carrier.getData();
			System.out.println ("===");
			System.out.println (args[i]);
			System.out.println (ts.toString());
		}
	} catch (Exception e) {
		System.out.println (e);
		e.printStackTrace();
	}
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:20,代码来源:CharSequence2TokenSequence.java

示例15: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
	{
		try {
			if (carrier.getData() instanceof URI)
				carrier.setData(pipe ((URI)carrier.getData()));
			else if (carrier.getData() instanceof File)
				carrier.setData(pipe ((File)carrier.getData()));
			else if (carrier.getData() instanceof Reader)
				carrier.setData(pipe ((Reader)carrier.getData()));
			else if (carrier.getData() instanceof CharSequence)
				;																	// No conversion necessary
			else
				throw new IllegalArgumentException ("Does not handle class "+carrier.getData().getClass());

		} catch (java.io.IOException e) {
			throw new IllegalArgumentException ("IOException " + e);
		}

//		System.out.println(carrier.getData().toString());
		return carrier;
	}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:Input2CharSequence.java


注:本文中的edu.umass.cs.mallet.base.types.Instance类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。