本文整理汇总了Java中cc.mallet.types.Sequence.size方法的典型用法代码示例。如果您正苦于以下问题:Java Sequence.size方法的具体用法?Java Sequence.size怎么用?Java Sequence.size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.Sequence
的用法示例。
在下文中一共展示了Sequence.size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: tokenAccuracy
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public double tokenAccuracy (Sequence referenceOutput, PrintWriter out)
{
Sequence output = bestOutputSequence();
int accuracy = 0;
String testString;
assert (referenceOutput.size() == output.size());
for (int i = 0; i < output.size(); i++) {
//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
testString = output.get(i).toString();
if (out != null) {
out.println(testString);
}
if (referenceOutput.get(i).toString().equals (testString)) {
accuracy++;
}
}
logger.info ("Number correct: " + accuracy + " out of " + output.size());
return ((double)accuracy)/output.size();
}
示例2: sequencesMatch
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private boolean sequencesMatch (Sequence trueOutput, Sequence predOutput)
{
for (int j = 0; j < trueOutput.size(); j++) {
Object tru = trueOutput.get(j);
Object pred = predOutput.get(j);
if (!tru.toString().equals (pred.toString())) {
return false;
}
}
return true;
}
示例3: addSpansFromTags
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
Label backgroundTag)
{
int i = 0;
int docidx = 0;
while (i < tags.size()) {
Label thisTag = dict.lookupLabel (tags.get(i).toString());
int startTokenIdx = i;
while (i < tags.size()) {
Label nextTag = dict.lookupLabel (tags.get(i).toString ());
if (thisTag != nextTag) break;
i++;
}
int endTokenIdx = i;
Span span = input.subspan(startTokenIdx, endTokenIdx);
addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
docidx = ((StringSpan) span).getEndIdx ();
labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
}
}
示例4: addSpansFromTags
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
Label backgroundTag)
{
int i = 0;
int docidx = 0;
while (i < tags.size ()) {
Label thisTag = dict.lookupLabel (tags.get (i).toString ());
int startTokenIdx = i;
while (++i < tags.size ()) {
Label nextTag = dict.lookupLabel (tags.get (i).toString ());
if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break;
}
int endTokenIdx = i;
Span span = createSpan (input, startTokenIdx, endTokenIdx);
addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
docidx = ((StringSpan) span).getEndIdx ();
if (isBeginTag (thisTag) || isInsideTag (thisTag)) {
thisTag = trimTag (dict, thisTag);
}
labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
}
}
示例5: annotate
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public List<ReferenceLineAnnotation> annotate(List<String> linesWithLayout) throws IOException, AnalysisException {
StringBuilder lineStringBuilder = new StringBuilder();
for (String line : linesWithLayout) {
lineStringBuilder.append(line).append(System.lineSeparator());
}
BufferedReader lineReader = new BufferedReader(new StringReader(lineStringBuilder.toString()));
InstanceList inputInstances = new InstanceList(this.pipe);
inputInstances.addThruPipe(new LineGroupIterator(lineReader, Pattern.compile("^\\s*$"), true));
lineReader.close();
List<ReferenceLineAnnotation> referenceLineAnnotations = new ArrayList<ReferenceLineAnnotation>();
for (Instance instance : inputInstances) {
@SuppressWarnings("unchecked")
Sequence<String> inputSequence = (Sequence<String>) instance.getData();
SumLatticeDefault latticeDefault = new SumLatticeDefault(this.crf, inputSequence);
Alphabet outputAlphabet = this.crf.getOutputAlphabet();
if (linesWithLayout.size() != inputSequence.size()) {
throw new IllegalStateException("linesWithLayout.size()!=inputSequence.size()");
}
for (int i = 0; i < inputSequence.size(); i++) {
ReferenceLineAnnotation referenceLineAnnotation = new ReferenceLineAnnotation(
linesWithLayout.get(i).split("\\t")[0]);
for (int j = 1; j <= outputAlphabet.size(); j++) {
State state = this.crf.getState(j);
referenceLineAnnotation.addAnnotation(state.getName(),
latticeDefault.getGammaProbability(i + 1, state));
}
referenceLineAnnotations.add(referenceLineAnnotation);
}
}
return referenceLineAnnotations;
}
示例6: call
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public Double call() throws Exception {
double value = 0;
for (int ii = start; ii < end; ii++) {
Instance inst = trainingSet.get(ii);
Sequence input = (Sequence) inst.getData();
double initProbs[] = initialProbList.get(ii);
double finalProbs[] = finalProbList.get(ii);
double transProbs[][][] = transitionProbList.get(ii);
double[][][] cachedDots = new double[input.size()][crf.numStates()][crf.numStates()];
for (int j = 0; j < input.size(); j++) {
for (int k = 0; k < crf.numStates(); k++) {
for (int l = 0; l < crf.numStates(); l++) {
cachedDots[j][k][l] = Transducer.IMPOSSIBLE_WEIGHT;
}
}
}
double labeledWeight = new SumLatticeKL(crf, input, initProbs,
finalProbs, transProbs, cachedDots, null).getTotalWeight();
value += labeledWeight;
//double unlabeledWeight = new SumLatticeDefault(crf, input,
// expectationsCopy.new Incrementor()).getTotalWeight();
double unlabeledWeight = new SumLatticeDefaultCachedDot(crf, input, null,
cachedDots, expectationsCopy.new Incrementor(), false, null).getTotalWeight();
value -= unlabeledWeight;
}
return value;
}
示例7: elementwiseAccuracy
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public double elementwiseAccuracy (Sequence referenceOutput)
{
int accuracy = 0;
Sequence output = bestOutputSequence();
assert (referenceOutput.size() == output.size());
for (int i = 0; i < output.size(); i++) {
//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
if (referenceOutput.get(i).toString().equals (output.get(i).toString())) {
accuracy++;
}
}
logger.info ("Number correct: " + accuracy + " out of " + output.size());
return ((double)accuracy)/output.size();
}
示例8: evaluateInstanceList
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
@Override
public void evaluateInstanceList(TransducerTrainer transducer,
InstanceList instances, String description) {
double[] predCounts = new double[instances.getTargetAlphabet().size()];
double[] trueCounts = new double[instances.getTargetAlphabet().size()];
int total = 0;
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
Sequence trueOutput = (Sequence) instance.getTarget();
Sequence predOutput = (Sequence) transducer.getTransducer().transduce((Sequence)instance.getData());
for (int j = 0; j < predOutput.size(); j++) {
total++;
predCounts[instances.getTargetAlphabet().lookupIndex(predOutput.get(j))]++;
trueCounts[instances.getTargetAlphabet().lookupIndex(trueOutput.get(j))]++;
}
}
NumberFormat formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(4);
for (int li = 0; li < predCounts.length; li++) {
double ppred = predCounts[li] / total;
double ptrue = trueCounts[li] / total;
logger.info(description + " " + instances.getTargetAlphabet().lookupObject(li) + " predicted: " + formatter.format(ppred) + " - true: " + formatter.format(ptrue));
}
}
示例9: Segment
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
/**
* Initializes the segment.
*
* @param input entire input sequence
* @param pred predicted sequence
* @param start starting position of extracted segment
* @param end ending position of extracted segment
*/
public Segment (Sequence input, Sequence pred, Sequence truth, int start, int end,
Object startTag, Object inTag )
{
this.input = input;
this.pred = pred;
this.truth = truth;
this.start = start;
this.startTag = startTag;
this.inTag = inTag;
this.end = end;
this.confidence = -1;
this.correct = true;
this.endsPrematurely = false;
for (int i=start; i <= end; i++) {
if (!pred.get(i).equals (truth.get(i))) {
this.correct = false;
break;
}
}
// segment can also be incorrect if it ends prematurely
if (truth != null) {
if (correct && end+1 < truth.size() && truth.get (end+1).equals (inTag)) {
this.correct = false;
this.endsPrematurely = true;
}
}
}
示例10: evaluateInstanceList
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer trainer, InstanceList instances, String description)
{
int numCorrectTokens;
int totalTokens;
Transducer transducer = trainer.getTransducer();
totalTokens = numCorrectTokens = 0;
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
//System.err.println ("TokenAccuracyEvaluator "+i+" length="+input.size());
Sequence predOutput = transducer.transduce (input);
assert (predOutput.size() == trueOutput.size());
for (int j = 0; j < trueOutput.size(); j++) {
totalTokens++;
if (trueOutput.get(j).equals(predOutput.get(j)))
numCorrectTokens++;
}
//System.err.println ("TokenAccuracyEvaluator "+i+" numCorrectTokens="+numCorrectTokens+" totalTokens="+totalTokens+" accuracy="+((double)numCorrectTokens)/totalTokens);
}
double acc = ((double)numCorrectTokens)/totalTokens;
//System.err.println ("TokenAccuracyEvaluator accuracy="+acc);
accuracy.put(description, acc);
logger.info (description +" accuracy="+acc);
}
示例11: ArraySequence
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
protected ArraySequence (Sequence<E> s, boolean copy)
{
if (s instanceof ArraySequence) {
if (copy) {
data = (E[])new Object[s.size()];
System.arraycopy (((ArraySequence)s).data, 0, data, 0, data.length);
} else
data = ((ArraySequence<E>)s).data;
} else {
data = (E[])new Object[s.size()];
for (int i = 0; i < s.size(); i++)
data[i] = s.get(i);
}
}
示例12: elementwiseAccuracy
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public static double elementwiseAccuracy (Sequence truth, Sequence predicted) {
int accuracy = 0;
assert (truth.size() == predicted.size());
for (int i = 0; i < predicted.size(); i++) {
//logger.fine("tokenAccuracy: ref: "+referenceOutput.get(i)+" viterbi: "+output.get(i));
if (truth.get(i).toString().equals (predicted.get(i).toString())) {
accuracy++;
}
}
return ((double)accuracy)/predicted.size();
}
示例13: MaxLatticeDefault
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
/** Initiate Viterbi decoding of the inputSequence, contrained to match non-null parts of the outputSequence.
* maxCaches indicates how much state information to memoize in n-best decoding. */
public MaxLatticeDefault (Transducer t, Sequence inputSequence, Sequence outputSequence, int maxCaches)
{
// This method initializes the forward path, but does not yet do the backward pass.
this.t = t;
if (maxCaches < 1)
maxCaches = 1;
this.maxCaches = maxCaches;
assert (inputSequence != null);
if (logger.isLoggable (Level.FINE)) {
logger.fine ("Starting ViterbiLattice");
logger.fine ("Input: ");
for (int ip = 0; ip < inputSequence.size(); ip++)
logger.fine (" " + inputSequence.get(ip));
logger.fine ("\nOutput: ");
if (outputSequence == null)
logger.fine ("null");
else
for (int op = 0; op < outputSequence.size(); op++)
logger.fine (" " + outputSequence.get(op));
logger.fine ("\n");
}
this.input = inputSequence;
this.providedOutput = outputSequence;
latticeLength = input.size()+1;
int numStates = t.numStates();
lattice = new ViterbiNode[latticeLength][numStates];
caches = new WeightCache[latticeLength-1];
// Viterbi Forward
logger.fine ("Starting Viterbi");
boolean anyInitialState = false;
for (int i = 0; i < numStates; i++) {
double initialWeight = t.getState(i).getInitialWeight();
if (initialWeight > Transducer.IMPOSSIBLE_WEIGHT) {
ViterbiNode n = getViterbiNode (0, i);
n.delta = initialWeight;
anyInitialState = true;
}
}
if (!anyInitialState) {
logger.warning ("Viterbi: No initial states!");
}
for (int ip = 0; ip < latticeLength-1; ip++)
for (int i = 0; i < numStates; i++) {
if (lattice[ip][i] == null || lattice[ip][i].delta == Transducer.IMPOSSIBLE_WEIGHT)
continue;
State s = t.getState(i);
TransitionIterator iter = s.transitionIterator (input, ip, providedOutput, ip);
if (logger.isLoggable (Level.FINE))
logger.fine (" Starting Viterbi transition iteration from state "
+ s.getName() + " on input " + input.get(ip));
while (iter.hasNext()) {
State destination = iter.next();
if (logger.isLoggable (Level.FINE))
logger.fine ("Viterbi[inputPos="+ip
+"][source="+s.getName()
+"][dest="+destination.getName()+"]");
ViterbiNode destinationNode = getViterbiNode (ip+1, destination.getIndex());
destinationNode.output = iter.getOutput();
double weight = lattice[ip][i].delta + iter.getWeight();
if (ip == latticeLength-2) {
weight += destination.getFinalWeight();
}
if (weight > destinationNode.delta) {
if (logger.isLoggable (Level.FINE))
logger.fine ("Viterbi[inputPos="+ip
+"][source][dest="+destination.getName()
+"] weight increased to "+weight+" by source="+
s.getName());
destinationNode.delta = weight;
destinationNode.maxWeightPredecessor = lattice[ip][i];
}
}
}
}
示例14: testStateAddWeights
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
public void testStateAddWeights() {
Pipe p = makeSpacePredictionPipe(); // This used to be
// MEMM.makeSpacePredictionPipe(),
// but I don't know why -akm 12/2007
InstanceList training = new InstanceList(p);
training.addThruPipe(new ArrayIterator(data)); // This used to be
// MEMM.data, but I
// don't know why -akm
// 12/2007
CRF crf = new CRF(p, null);
crf.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crft.trainIncremental(training);
// Check that the notstart state is used at test time
Sequence input = (Sequence) training.get(0).getData();
Sequence output = new MaxLatticeDefault(crf, input)
.bestOutputSequence();
boolean notstartFound = false;
for (int i = 0; i < output.size(); i++) {
if (output.get(i).toString().equals("notstart")) {
notstartFound = true;
}
}
System.err.println(output.toString());
assertTrue(notstartFound);
// Now add -infinite weight onto a transition, and make sure that it's
// honored.
CRF.State state = crf.getState("notstart");
int widx = crf.getWeightsIndex("BadBad");
int numFeatures = crf.getInputAlphabet().size();
SparseVector w = new SparseVector(new double[numFeatures]);
w.setAll(Double.NEGATIVE_INFINITY);
crf.setWeights(widx, w);
state.addWeight(0, "BadBad");
state.addWeight(1, "BadBad");
// Verify that this effectively prevents the notstart state from being
// used
output = new MaxLatticeDefault(crf, input).bestOutputSequence();
notstartFound = false;
for (int i = 0; i < output.size() - 1; i++) {
if (output.get(i).toString().equals("notstart")) {
notstartFound = true;
}
}
assertTrue(!notstartFound);
}
示例15: makeConstraints
import cc.mallet.types.Sequence; //导入方法依赖的package包/类
private static int[] makeConstraints (Transducer t, Sequence inputSequence, Sequence outputSequence, Segment requiredSegment, Sequence constrainedSequence) {
if (constrainedSequence.size () != inputSequence.size ())
throw new IllegalArgumentException ("constrainedSequence.size [" + constrainedSequence.size () + "] != inputSequence.size [" + inputSequence.size () + "]");
// constraints tells the lattice which states must emit which
// observations. positive values say all paths must pass through
// this state index, negative values say all paths must _not_
// pass through this state index. 0 means we don't
// care. initialize to 0. include 1 extra node for start state.
int [] constraints = new int [constrainedSequence.size() + 1];
for (int c = 0; c < constraints.length; c++)
constraints[c] = 0;
for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) {
int si = t.stateIndexOfString ((String)constrainedSequence.get (i));
if (si == -1)
logger.warning ("Could not find state " + constrainedSequence.get (i) + ". Check that state labels match startTages and inTags, and that all labels are seen in training data.");
// throw new IllegalArgumentException ("Could not find state " + constrainedSequence.get(i) + ". Check that state labels match startTags and InTags.");
constraints[i+1] = si + 1;
}
// set additional negative constraint to ensure state after
// segment is not a continue tag
// xxx if segment length=1, this actually constrains the sequence
// to B-tag (B-tag)', instead of the intended constraint of B-tag
// (I-tag)'
// the fix below is unsafe, but will have to do for now.
// FIXED BELOW
/* String endTag = (String) constrainedSequence.get (requiredSegment.getEnd ());
if (requiredSegment.getEnd()+2 < constraints.length) {
if (requiredSegment.getStart() == requiredSegment.getEnd()) { // segment has length 1
if (endTag.startsWith ("B-")) {
endTag = "I" + endTag.substring (1, endTag.length());
}
else if (!(endTag.startsWith ("I-") || endTag.startsWith ("0")))
throw new IllegalArgumentException ("Constrained Lattice requires that states are tagged in B-I-O format.");
}
int statei = stateIndexOfString (endTag);
if (statei == -1) // no I- tag for this B- tag
statei = stateIndexOfString ((String)constrainedSequence.get (requiredSegment.getStart ()));
constraints[requiredSegment.getEnd() + 2] = - (statei + 1);
}
*/
if (requiredSegment.getEnd() + 2 < constraints.length) { // if
String endTag = requiredSegment.getInTag().toString();
int statei = t.stateIndexOfString (endTag);
if (statei == -1)
throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags.");
constraints[requiredSegment.getEnd() + 2] = - (statei + 1);
}
// printStates ();
logger.fine ("Segment:\n" + requiredSegment.sequenceToString () +
"\nconstrainedSequence:\n" + constrainedSequence +
"\nConstraints:\n");
for (int i=0; i < constraints.length; i++) {
logger.fine (constraints[i] + "\t");
}
logger.fine ("");
return constraints;
}