本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance类的典型用法代码示例。如果您正苦于以下问题:Java Instance类的具体用法?Java Instance怎么用?Java Instance使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Instance类属于edu.umass.cs.mallet.base.types包,在下文中一共展示了Instance类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: averageTokenAccuracy
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public double averageTokenAccuracy (InstanceList ilist, String fileName)
{
double accuracy = 0;
PrintWriter out;
File f = new File(fileName);
try {
out = new PrintWriter(new FileWriter(f));
}
catch (IOException e) {
out = null;
}
for (int i = 0; i < ilist.size(); i++) {
Instance instance = ilist.getInstance(i);
Sequence input = (Sequence) instance.getData();
Sequence output = (Sequence) instance.getTarget();
assert (input.size() == output.size());
double pathAccuracy = viterbiPath(input).tokenAccuracy(output, out);
accuracy += pathAccuracy;
logger.info ("Transducer path accuracy = "+pathAccuracy);
}
out.close();
return accuracy/ilist.size();
}
示例2: test
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void test (Transducer crf, InstanceList data, String description, PrintStream viterbiOutputStream)
{
int correct = 0;
for (int i = 0; i < data.size(); i++) {
Instance instance = data.getInstance(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
Sequence predOutput = crf.transduce (input);
assert (predOutput.size() == trueOutput.size());
if (sequencesMatch (trueOutput, predOutput))
correct++;
}
double acc = ((double)correct) / data.size();
logger.info (description+" Num instances = "+data.size()+" Num correct = "+correct);
logger.info (description+" Per-instance accuracy = "+acc);
}
示例3: add
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list. Note that since memory for
* the Instance has already been allocated, no check is made to
* catch OutOfMemoryError.
* @return <code>true</code> if successful
*/
public boolean add (Instance instance)
{
if (pipe == notYetSetPipe)
pipe = instance.getPipe();
else if (instance.getPipe() != pipe)
// Making sure that the Instance has the same pipe as us.
// xxx This also is a good time check that the constituent data is
// of a consistent type?
throw new IllegalArgumentException ("pipes don't match: instance: "+
instance.getPipe()+" Instance.list: "+
this.pipe);
if (dataClass == null) {
dataClass = instance.data.getClass();
if (pipe != null && pipe.isTargetProcessing())
targetClass = instance.target.getClass();
}
instance.setLock();
boolean ret = instances.add (instance);
inMemory.set(size()-1);
logger.finer ("Added instance " + (size()-1) + ". Free memory remaining (bytes): " +
Runtime.getRuntime().freeMemory());
return ret;
}
示例4: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
if (ts.size() > 3
&& (ts.getToken(2).getText().equals("-") || ts.getToken(3).getText().equals("-"))
&& ts.getToken(1).getText().matches("[A-Z]+")) {
String header = ts.getToken(1).getText();
if (header.equals("PRESS")) // Don't bother with "PRESS DIGEST" headers
return carrier;
String featureName = "HEADER="+header;
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
// Only apply this feature to capitalized words, because if we apply it to everything
// we easily get an immense number of possible feature conjunctions, (e.g. every word
// with each of these HEADER= features.
if (t.getText().matches("^[A-Z].*"))
t.setFeatureValue (featureName, 1.0);
}
}
return carrier;
}
示例5: pipeOutputAccumulate
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void pipeOutputAccumulate (Instance carrier, Pipe iteratedPipe)
{
// xxx ??? assert (iteratedPipe == pipe);
// The assertion above won't be true when using IteratedPipe...
//logger.fine ("pipeOutputAccumulate target="+target);
// These various add() methods below will make sure that the Pipes match appropriately
if (carrier.getData() instanceof InstanceList)
add ((InstanceList)carrier.getData());
else if (carrier.getData() instanceof PipeInputIterator)
add ((PipeInputIterator)carrier.getData());
else if (carrier.getData() instanceof Instance)
add ((Instance)carrier.getData());
else {
if (pipe == notYetSetPipe)
pipe = iteratedPipe;
//System.out.println ("Instance.pipeOuputAccumulate carrier.getSource()="+carrier.getSource());
// Carrier has already been piped; make sure not to repipe it.
add (carrier);
}
}
示例6: add
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list.
* @return <code>true</code>
*/
public boolean add (Instance instance)
{
if (pipe == notYetSetPipe)
pipe = instance.getPipe();
else if (instance.getPipe() != pipe)
// Making sure that the Instance has the same pipe as us.
// xxx This also is a good time check that the constituent data is
// of a consistent type?
throw new IllegalArgumentException ("pipes don't match: instance: "+
instance.getPipe()+" Instance.list: "+
this.pipe);
if (dataClass == null) {
dataClass = instance.data.getClass();
if (pipe != null && pipe.isTargetProcessing())
if (instance.target != null)
targetClass = instance.target.getClass();
}
return instances.add (instance);
}
示例7: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe(Instance carrier, int startingIndex)
{
// System.err.println(pipes.size());
for (int i = startingIndex; i < pipes.size(); i++)
{
// System.err.println("Pipe: " + i);
Pipe p = (Pipe) pipes.get(i);
if (p == null)
{
System.err.println("Pipe is null");
} else
{
carrier = p.pipe(carrier);
}
}
return carrier;
}
示例8: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
int tsSize = ts.size();
for (int i = tsSize-1; i >= 0; i--) {
Token t = ts.getToken (i);
String text = t.getText();
if (featureRegex != null && !featureRegex.matcher(text).matches())
continue;
for (int j = 0; j < i; j++) {
if (ts.getToken(j).getText().equals(text)) {
PropertyList.Iterator iter = ts.getToken(j).getFeatures().iterator();
while (iter.hasNext()) {
iter.next();
String key = iter.getKey();
if (filterRegex == null || (filterRegex.matcher(key).matches() ^ !includeFiltered))
t.setFeatureValue (namePrefix+key, iter.getNumericValue());
}
break;
}
if (firstMentionName != null)
t.setFeatureValue (firstMentionName, 1.0);
}
}
return carrier;
}
示例9: getUnnormalizedClassificationScores
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
// arrayOutOfBounds if pipe has grown since training
// int numFeatures = getAlphabet().size() + 1;
int numFeatures = this.defaultFeatureIndex + 1;
int numLabels = getLabelAlphabet().size();
assert (scores.length == numLabels);
FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe);
// Make sure the feature vector's feature dictionary matches
// what we are expecting from our data pipe (and thus our notion
// of feature probabilities.
assert (fv.getAlphabet ()
== this.instancePipe.getDataAlphabet ());
// Include the feature weights according to each label
for (int li = 0; li < numLabels; li++) {
scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
+ MatrixOps.rowDotProduct (parameters, numFeatures,
li, fv,
defaultFeatureIndex,
(perClassFeatureSelection == null
? featureSelection
: perClassFeatureSelection[li]));
}
}
示例10: getPositionsAndTypes
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private void getPositionsAndTypes(Sentence sentence, TagPosition[] positions, MentionType[] types, boolean reverse)
{
Instance instance = new Instance(sentence.getTrainingText(format, reverse), null, sentence.getTag(), null, forwardCRF.getInputPipe());
Sequence tags = forwardCRF.viterbiPath((Sequence)instance.getData()).output();
if (positions.length != tags.size())
throw new IllegalArgumentException();
if (types.length != tags.size())
throw new IllegalArgumentException();
for (int i = 0; i < tags.size(); i++)
{
// The tag string is e.g. "O" or "B-GENE"
String[] split = tags.get(i).toString().split("-");
positions[i] = TagPosition.valueOf(split[0]);
// TODO Verify that the type stays the same
if (split.length == 2)
types[i] = MentionType.getType(split[1]);
}
if (reverse)
{
reverse(positions);
reverse(types);
}
}
示例11: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
String newTerm = null;
TokenSequence tmpTS = new TokenSequence();
TokenSequence ts = (TokenSequence) carrier.getData();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
for(int j = 0; j < gramSizes.length; j++) {
int len = gramSizes[j];
if (len <= 0 || len > (i+1)) continue;
if (len == 1) { tmpTS.add(t); continue; }
newTerm = new String(t.getText());
for(int k = 1; k < len; k++)
newTerm = ts.getToken(i-k) + "_" + newTerm;
tmpTS.add(newTerm);
}
}
carrier.setData(tmpTS);
return carrier;
}
示例12: train
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private static CRF4 train(List<Sentence> sentences, int order, boolean useFeatureInduction, TagFormat format, Pipe pipe, boolean reverse)
{
InstanceList instances = new InstanceList(pipe);
for (Sentence sentence : sentences)
{
String text = sentence.getTrainingText(format, reverse);
instances.add(new Instance(text, null, sentence.getTag(), null, pipe));
}
CRF4 crf = new CRF4(pipe, null);
if (order == 1)
crf.addStatesForLabelsConnectedAsIn(instances);
else if (order == 2)
crf.addStatesForBiLabelsConnectedAsIn(instances);
else
throw new IllegalArgumentException("Order must be equal to 1 or 2");
if (useFeatureInduction)
crf.trainWithFeatureInduction(instances, null, null, null, 99999, 100, 10, 1000, 0.5, false, new double[] {.2, .5, .8});
else
crf.train(instances, null, null, (MultiSegmentationEvaluator)null, 99999, 10, new double[] {.2, .5, .8});
return crf;
}
示例13: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Convert the data in an <CODE>Instance</CODE> from a CharSequence
* of comma-separated-values to an array, where each index is the
* feature name.
*/
public Instance pipe( Instance carrier ) {
CharSequence c = (CharSequence)carrier.getData();
int nf = countNumberFeatures (c);
if (numberFeatures == -1) // first instance seen
numberFeatures = nf;
else if (numberFeatures != nf)
throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf);
double[] feats = new double[numberFeatures];
lexer.setCharSequence (c);
int i=0;
while (lexer.hasNext())
feats[i++] = Double.parseDouble ((String)lexer.next());
carrier.setData (feats);
return carrier;
}
示例14: main
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public static void main (String[] args)
{
try {
for (int i = 0; i < args.length; i++) {
Instance carrier = new Instance (new File(args[i]), null, null, null);
Pipe p = new SerialPipes (new Pipe[] {
new Input2CharSequence (),
new CharSequence2TokenSequence(new CharSequenceLexer())});
carrier = p.pipe (carrier);
TokenSequence ts = (TokenSequence) carrier.getData();
System.out.println ("===");
System.out.println (args[i]);
System.out.println (ts.toString());
}
} catch (Exception e) {
System.out.println (e);
e.printStackTrace();
}
}
示例15: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
try {
if (carrier.getData() instanceof URI)
carrier.setData(pipe ((URI)carrier.getData()));
else if (carrier.getData() instanceof File)
carrier.setData(pipe ((File)carrier.getData()));
else if (carrier.getData() instanceof Reader)
carrier.setData(pipe ((Reader)carrier.getData()));
else if (carrier.getData() instanceof CharSequence)
; // No conversion necessary
else
throw new IllegalArgumentException ("Does not handle class "+carrier.getData().getClass());
} catch (java.io.IOException e) {
throw new IllegalArgumentException ("IOException " + e);
}
// System.out.println(carrier.getData().toString());
return carrier;
}