当前位置: 首页>>代码示例>>Java>>正文


Java Sequence.size方法代码示例

本文整理汇总了Java中cc.mallet.types.Sequence.size方法的典型用法代码示例。如果您正苦于以下问题:Java Sequence.size方法的具体用法?Java Sequence.size怎么用?Java Sequence.size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.Sequence的用法示例。


在下文中一共展示了Sequence.size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: tokenAccuracy

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public double tokenAccuracy (Sequence referenceOutput, PrintWriter out)
{
	Sequence output = bestOutputSequence();
	int accuracy = 0;
	String testString;
	assert (referenceOutput.size() == output.size());
	for (int i = 0; i < output.size(); i++) {
		//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
		testString = output.get(i).toString();
		if (out != null) {
			out.println(testString);
		}
		if (referenceOutput.get(i).toString().equals (testString)) {
			accuracy++;
		}
	}
	logger.info ("Number correct: " + accuracy + " out of " + output.size());
	return ((double)accuracy)/output.size();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:MaxLatticeDefault.java

示例2: sequencesMatch

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private boolean sequencesMatch (Sequence trueOutput, Sequence predOutput)
{
  for (int j = 0; j < trueOutput.size(); j++) {
    Object tru = trueOutput.get(j);
    Object pred = predOutput.get(j);
    if (!tru.toString().equals (pred.toString())) {
      return false;
    }
  }
  return true;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:InstanceAccuracyEvaluator.java

示例3: addSpansFromTags

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
                              Label backgroundTag)
{
  int i = 0;
  int docidx = 0;
  while (i < tags.size()) {
    Label thisTag = dict.lookupLabel (tags.get(i).toString());
    int startTokenIdx = i;
    while (i < tags.size()) {
      Label nextTag = dict.lookupLabel (tags.get(i).toString ());
      if (thisTag != nextTag) break;
      i++;
    }
    int endTokenIdx = i;
    Span span = input.subspan(startTokenIdx, endTokenIdx);
    addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
    docidx = ((StringSpan) span).getEndIdx ();
    labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:DefaultTokenizationFilter.java

示例4: addSpansFromTags

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
                               Label backgroundTag)
{
  int i = 0;
  int docidx = 0;
  while (i < tags.size ()) {
    Label thisTag = dict.lookupLabel (tags.get (i).toString ());
    int startTokenIdx = i;
    while (++i < tags.size ()) {
      Label nextTag = dict.lookupLabel (tags.get (i).toString ());
      if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break;
    }
    int endTokenIdx = i;
    Span span = createSpan (input, startTokenIdx, endTokenIdx);
    addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
    docidx = ((StringSpan) span).getEndIdx ();

    if (isBeginTag (thisTag) || isInsideTag (thisTag)) {
      thisTag = trimTag (dict, thisTag);
    }
    labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:BIOTokenizationFilter.java

示例5: annotate

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public List<ReferenceLineAnnotation> annotate(List<String> linesWithLayout) throws IOException, AnalysisException {

        StringBuilder lineStringBuilder = new StringBuilder();
        for (String line : linesWithLayout) {
            lineStringBuilder.append(line).append(System.lineSeparator());
        }
        BufferedReader lineReader = new BufferedReader(new StringReader(lineStringBuilder.toString()));

        InstanceList inputInstances = new InstanceList(this.pipe);
        inputInstances.addThruPipe(new LineGroupIterator(lineReader, Pattern.compile("^\\s*$"), true));
        lineReader.close();

        List<ReferenceLineAnnotation> referenceLineAnnotations = new ArrayList<ReferenceLineAnnotation>();

        for (Instance instance : inputInstances) {
            @SuppressWarnings("unchecked")
            Sequence<String> inputSequence = (Sequence<String>) instance.getData();
            SumLatticeDefault latticeDefault = new SumLatticeDefault(this.crf, inputSequence);
            Alphabet outputAlphabet = this.crf.getOutputAlphabet();
            if (linesWithLayout.size() != inputSequence.size()) {
                throw new IllegalStateException("linesWithLayout.size()!=inputSequence.size()");
            }
            for (int i = 0; i < inputSequence.size(); i++) {
                ReferenceLineAnnotation referenceLineAnnotation = new ReferenceLineAnnotation(
                        linesWithLayout.get(i).split("\\t")[0]);
                for (int j = 1; j <= outputAlphabet.size(); j++) {
                    State state = this.crf.getState(j);
                    referenceLineAnnotation.addAnnotation(state.getName(),
                            latticeDefault.getGammaProbability(i + 1, state));
                }
                referenceLineAnnotations.add(referenceLineAnnotation);
            }
        }
        return referenceLineAnnotations;
    }
 
开发者ID:exciteproject,项目名称:refext,代码行数:36,代码来源:ReferenceLineAnnotator.java

示例6: call

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public Double call() throws Exception {
	double value = 0;
	
	for (int ii = start; ii < end; ii++) {
		Instance inst = trainingSet.get(ii);
		Sequence input = (Sequence) inst.getData();
		double initProbs[] = initialProbList.get(ii);
		double finalProbs[] = finalProbList.get(ii);
		double transProbs[][][] = transitionProbList.get(ii);

     double[][][] cachedDots = new double[input.size()][crf.numStates()][crf.numStates()];
     for (int j = 0; j < input.size(); j++) {
       for (int k = 0; k < crf.numStates(); k++) {
         for (int l = 0; l < crf.numStates(); l++) {
           cachedDots[j][k][l] = Transducer.IMPOSSIBLE_WEIGHT;
         }
       }
     }
		
		double labeledWeight = new SumLatticeKL(crf, input, initProbs, 
		    finalProbs, transProbs, cachedDots, null).getTotalWeight();
		value += labeledWeight;

		//double unlabeledWeight = new SumLatticeDefault(crf, input, 
		//    expectationsCopy.new Incrementor()).getTotalWeight();
      double unlabeledWeight = new SumLatticeDefaultCachedDot(crf, input, null, 
          cachedDots, expectationsCopy.new Incrementor(), false, null).getTotalWeight();

		value -= unlabeledWeight;
	}
	return value;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:33,代码来源:CRFOptimizableByKL.java

示例7: elementwiseAccuracy

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public double elementwiseAccuracy (Sequence referenceOutput)
{
	int accuracy = 0;
	Sequence output = bestOutputSequence();
	assert (referenceOutput.size() == output.size());
	for (int i = 0; i < output.size(); i++) {
		//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
		if (referenceOutput.get(i).toString().equals (output.get(i).toString())) {
			accuracy++;
		}
	}
	logger.info ("Number correct: " + accuracy + " out of " + output.size());
	return ((double)accuracy)/output.size();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:MaxLatticeDefault.java

示例8: evaluateInstanceList

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
@Override
 public void evaluateInstanceList(TransducerTrainer transducer,
     InstanceList instances, String description) {
   double[] predCounts = new double[instances.getTargetAlphabet().size()];
   double[] trueCounts = new double[instances.getTargetAlphabet().size()];

   int total = 0;
   for (int i = 0; i < instances.size(); i++) {
     Instance instance = instances.get(i);
     Sequence trueOutput = (Sequence) instance.getTarget();
     Sequence predOutput = (Sequence) transducer.getTransducer().transduce((Sequence)instance.getData());
     for (int j = 0; j < predOutput.size(); j++) {
       total++;
       predCounts[instances.getTargetAlphabet().lookupIndex(predOutput.get(j))]++;
       trueCounts[instances.getTargetAlphabet().lookupIndex(trueOutput.get(j))]++;
     }
   }

NumberFormat formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(4);

   for (int li = 0; li < predCounts.length; li++) {
     double ppred = predCounts[li] / total;
     double ptrue = trueCounts[li] / total;
     logger.info(description + " " + instances.getTargetAlphabet().lookupObject(li) + " predicted: " + formatter.format(ppred) + " - true: " + formatter.format(ptrue));
   }
 }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:LabelDistributionEvaluator.java

示例9: Segment

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
/**
 * Initializes the segment.
 * 
 * @param input entire input sequence
 * @param pred predicted sequence
 * @param start starting position of extracted segment
 * @param end ending position of extracted segment
 */
public Segment (Sequence input, Sequence pred, Sequence truth, int start, int end,
								Object startTag, Object inTag )
{
	this.input = input;
	this.pred = pred;
	this.truth = truth;
		this.start = start;
	this.startTag = startTag;
	this.inTag = inTag;
	this.end = end;
	this.confidence = -1;
	this.correct = true;
	this.endsPrematurely = false;
	for (int i=start; i <= end; i++) {
		if (!pred.get(i).equals (truth.get(i))) {
			this.correct = false;
			break;
		}			
	}
	// segment can also be incorrect if it ends prematurely
	if (truth != null) {
	  if (correct && end+1 < truth.size() && truth.get (end+1).equals (inTag)) {
	    this.correct = false;
	    this.endsPrematurely = true;
	  }				
	}			
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:36,代码来源:Segment.java

示例10: evaluateInstanceList

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer trainer, InstanceList instances, String description) 
 {
	int numCorrectTokens;
	int totalTokens;

	Transducer transducer = trainer.getTransducer();
	totalTokens = numCorrectTokens = 0;
	for (int i = 0; i < instances.size(); i++) {
		Instance instance = instances.get(i);
		Sequence input = (Sequence) instance.getData();
		Sequence trueOutput = (Sequence) instance.getTarget();
		assert (input.size() == trueOutput.size());
		//System.err.println ("TokenAccuracyEvaluator "+i+" length="+input.size());
		Sequence predOutput = transducer.transduce (input);
		assert (predOutput.size() == trueOutput.size());

		for (int j = 0; j < trueOutput.size(); j++) {
			totalTokens++;
			if (trueOutput.get(j).equals(predOutput.get(j)))
				numCorrectTokens++;
		}
		//System.err.println ("TokenAccuracyEvaluator "+i+" numCorrectTokens="+numCorrectTokens+" totalTokens="+totalTokens+" accuracy="+((double)numCorrectTokens)/totalTokens);
	}
	double acc = ((double)numCorrectTokens)/totalTokens;
	//System.err.println ("TokenAccuracyEvaluator accuracy="+acc);
	accuracy.put(description, acc);
	logger.info (description +" accuracy="+acc);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TokenAccuracyEvaluator.java

示例11: ArraySequence

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
protected ArraySequence (Sequence<E> s, boolean copy)
{
	if (s instanceof ArraySequence) {
		if (copy) {
			data = (E[])new Object[s.size()];
			System.arraycopy (((ArraySequence)s).data, 0, data, 0, data.length);
		} else
			data = ((ArraySequence<E>)s).data;
	} else {
		data = (E[])new Object[s.size()];
		for (int i = 0; i < s.size(); i++)
			data[i] = s.get(i);
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:ArraySequence.java

示例12: elementwiseAccuracy

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public static double elementwiseAccuracy (Sequence truth, Sequence predicted) {
	int accuracy = 0;
	assert (truth.size() == predicted.size());
	for (int i = 0; i < predicted.size(); i++) {
		//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
		if (truth.get(i).toString().equals (predicted.get(i).toString())) {
			accuracy++;
		}
	}
	return ((double)accuracy)/predicted.size();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:Sequences.java

示例13: MaxLatticeDefault

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
/** Initiate Viterbi decoding of the inputSequence, contrained to match non-null parts of the outputSequence.
 * maxCaches indicates how much state information to memoize in n-best decoding. */
public MaxLatticeDefault (Transducer t, Sequence inputSequence, Sequence outputSequence, int maxCaches) 
{
	// This method initializes the forward path, but does not yet do the backward pass.
	this.t = t;
	if (maxCaches < 1)
		maxCaches = 1;
	this.maxCaches = maxCaches;
	assert (inputSequence != null);
	if (logger.isLoggable (Level.FINE)) {
		logger.fine ("Starting ViterbiLattice");
		logger.fine ("Input: ");
		for (int ip = 0; ip < inputSequence.size(); ip++)
			logger.fine (" " + inputSequence.get(ip));
		logger.fine ("\nOutput: ");
		if (outputSequence == null)
			logger.fine ("null");
		else
			for (int op = 0; op < outputSequence.size(); op++)
				logger.fine (" " + outputSequence.get(op));
		logger.fine ("\n");
	}

	this.input = inputSequence;
	this.providedOutput = outputSequence;
	latticeLength = input.size()+1;
	int numStates = t.numStates();
	lattice = new ViterbiNode[latticeLength][numStates];
	caches = new WeightCache[latticeLength-1];

	// Viterbi Forward
	logger.fine ("Starting Viterbi");
	boolean anyInitialState = false;
	for (int i = 0; i < numStates; i++) {
		double initialWeight = t.getState(i).getInitialWeight();
		if (initialWeight > Transducer.IMPOSSIBLE_WEIGHT) {
			ViterbiNode n = getViterbiNode (0, i);
			n.delta = initialWeight;
			anyInitialState = true;
		}
	}

	if (!anyInitialState) {
		logger.warning ("Viterbi: No initial states!");
	}

	for (int ip = 0; ip < latticeLength-1; ip++)
		for (int i = 0; i < numStates; i++) {
			if (lattice[ip][i] == null || lattice[ip][i].delta == Transducer.IMPOSSIBLE_WEIGHT)
				continue;
			State s = t.getState(i);
			TransitionIterator iter = s.transitionIterator (input, ip, providedOutput, ip);
			if (logger.isLoggable (Level.FINE))
				logger.fine (" Starting Viterbi transition iteration from state "
						+ s.getName() + " on input " + input.get(ip));
			while (iter.hasNext()) {
				State destination = iter.next();
				if (logger.isLoggable (Level.FINE))
					logger.fine ("Viterbi[inputPos="+ip
							+"][source="+s.getName()
							+"][dest="+destination.getName()+"]");
				ViterbiNode destinationNode = getViterbiNode (ip+1, destination.getIndex());
				destinationNode.output = iter.getOutput();
				double weight = lattice[ip][i].delta + iter.getWeight();
				if (ip == latticeLength-2) {
					weight += destination.getFinalWeight();
				}
				if (weight > destinationNode.delta) {
					if (logger.isLoggable (Level.FINE))
						logger.fine ("Viterbi[inputPos="+ip
								+"][source][dest="+destination.getName()
								+"] weight increased to "+weight+" by source="+
								s.getName());
					destinationNode.delta = weight;
					destinationNode.maxWeightPredecessor = lattice[ip][i];
				}
			}
		}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:81,代码来源:MaxLatticeDefault.java

示例14: testStateAddWeights

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public void testStateAddWeights() {
	Pipe p = makeSpacePredictionPipe(); // This used to be
	// MEMM.makeSpacePredictionPipe(),
	// but I don't know why -akm 12/2007
	InstanceList training = new InstanceList(p);
	training.addThruPipe(new ArrayIterator(data)); // This used to be
	// MEMM.data, but I
	// don't know why -akm
	// 12/2007

	CRF crf = new CRF(p, null);
	crf.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crft.trainIncremental(training);

	// Check that the notstart state is used at test time
	Sequence input = (Sequence) training.get(0).getData();
	Sequence output = new MaxLatticeDefault(crf, input)
			.bestOutputSequence();

	boolean notstartFound = false;
	for (int i = 0; i < output.size(); i++) {
		if (output.get(i).toString().equals("notstart")) {
			notstartFound = true;
		}
	}
	System.err.println(output.toString());
	assertTrue(notstartFound);

	// Now add -infinite weight onto a transition, and make sure that it's
	// honored.
	CRF.State state = crf.getState("notstart");
	int widx = crf.getWeightsIndex("BadBad");
	int numFeatures = crf.getInputAlphabet().size();
	SparseVector w = new SparseVector(new double[numFeatures]);
	w.setAll(Double.NEGATIVE_INFINITY);
	crf.setWeights(widx, w);

	state.addWeight(0, "BadBad");
	state.addWeight(1, "BadBad");

	// Verify that this effectively prevents the notstart state from being
	// used
	output = new MaxLatticeDefault(crf, input).bestOutputSequence();
	notstartFound = false;
	for (int i = 0; i < output.size() - 1; i++) {
		if (output.get(i).toString().equals("notstart")) {
			notstartFound = true;
		}
	}
	assertTrue(!notstartFound);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:53,代码来源:TestCRF.java

示例15: makeConstraints

import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private static int[] makeConstraints (Transducer t, Sequence inputSequence, Sequence outputSequence, Segment requiredSegment, Sequence constrainedSequence) {
		if (constrainedSequence.size () != inputSequence.size ())
			throw new IllegalArgumentException ("constrainedSequence.size [" + constrainedSequence.size () + "] != inputSequence.size [" + inputSequence.size () + "]");
		// constraints tells the lattice which states must emit which
		// observations.  positive values say all paths must pass through
		// this state index, negative values say all paths must _not_
		// pass through this state index.  0 means we don't
		// care. initialize to 0. include 1 extra node for start state.
		int [] constraints = new int [constrainedSequence.size() + 1];
		for (int c = 0; c < constraints.length; c++)
			constraints[c] = 0;
		for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) {
			int si = t.stateIndexOfString ((String)constrainedSequence.get (i));
			if (si == -1)
				logger.warning ("Could not find state " + constrainedSequence.get (i) + ". Check that state labels match startTages and inTags, and that all labels are seen in training data.");
//			throw new IllegalArgumentException ("Could not find state " + constrainedSequence.get(i) + ". Check that state labels match startTags and InTags.");
			constraints[i+1] = si + 1;
		}
		// set additional negative constraint to ensure state after
		// segment is not a continue tag

		// xxx if segment length=1, this actually constrains the sequence
		// to B-tag (B-tag)', instead of the intended constraint of B-tag
		// (I-tag)'
		// the fix below is unsafe, but will have to do for now.
		// FIXED BELOW
		/*		String endTag = (String) constrainedSequence.get (requiredSegment.getEnd ());
				if (requiredSegment.getEnd()+2 < constraints.length) {
					if (requiredSegment.getStart() == requiredSegment.getEnd()) { // segment has length 1
						if (endTag.startsWith ("B-")) {
							endTag = "I" + endTag.substring (1, endTag.length());
						}
						else if (!(endTag.startsWith ("I-") || endTag.startsWith ("0")))
							throw new IllegalArgumentException ("Constrained Lattice requires that states are tagged in B-I-O format.");
					}
					int statei = stateIndexOfString (endTag);
					if (statei == -1) // no I- tag for this B- tag
						statei = stateIndexOfString ((String)constrainedSequence.get (requiredSegment.getStart ()));
					constraints[requiredSegment.getEnd() + 2] = - (statei + 1);
				}
		 */
		if (requiredSegment.getEnd() + 2 < constraints.length) { // if
			String endTag = requiredSegment.getInTag().toString();
			int statei = t.stateIndexOfString (endTag);
			if (statei == -1)
				throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags.");
			constraints[requiredSegment.getEnd() + 2] = - (statei + 1);
		}

		//		printStates ();
		logger.fine ("Segment:\n" + requiredSegment.sequenceToString () +
				"\nconstrainedSequence:\n" + constrainedSequence +
		"\nConstraints:\n");
		for (int i=0; i < constraints.length; i++) {
			logger.fine (constraints[i] + "\t");
		}
		logger.fine ("");
		return constraints;
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:60,代码来源:SumLatticeBeam.java


注:本文中的cc.mallet.types.Sequence.size方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。