本文整理汇总了Java中cc.mallet.types.LabelAlphabet.lookupLabel方法的典型用法代码示例。如果您正苦于以下问题:Java LabelAlphabet.lookupLabel方法的具体用法?Java LabelAlphabet.lookupLabel怎么用?Java LabelAlphabet.lookupLabel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.LabelAlphabet
的用法示例。
在下文中一共展示了LabelAlphabet.lookupLabel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: toLabelsSequence
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public LabelsSequence toLabelsSequence (Assignment assn)
{
int numFactors = numSlices ();
int maxTime = maxTime ();
Labels[] lbls = new Labels [maxTime];
for (int t = 0; t < maxTime; t++) {
Label[] theseLabels = new Label [numFactors];
for (int i = 0; i < numFactors; i++) {
Variable var = varOfIndex (t, i);
int maxidx;
if (var != null) {
maxidx = assn.get (var);
} else {
maxidx = 0;
}
LabelAlphabet dict = labelOfVar (var).getLabelAlphabet ();
theseLabels[i] = dict.lookupLabel (maxidx);
}
lbls[t] = new Labels (theseLabels);
}
return new LabelsSequence (lbls);
}
示例2: testSerializable
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testSerializable () throws IOException, ClassNotFoundException
{
LabelAlphabet dict = new LabelAlphabet ();
Labels lbls1 = new Labels (new Label[] {
dict.lookupLabel ("A"),
dict.lookupLabel ("B"),
});
Labels lbls2 = new Labels (new Label[] {
dict.lookupLabel ("C"),
dict.lookupLabel ("A"),
});
LabelsSequence lblseq = new LabelsSequence (new Labels[] { lbls1, lbls2 });
LabelsSequence lblseq2 = (LabelsSequence) TestSerializable.cloneViaSerialization (lblseq);
assertEquals (lblseq.size(), lblseq2.size());
assertEquals (lblseq.getLabels(0).toString(), lblseq2.getLabels(0).toString ());
assertEquals (lblseq.getLabels(1).toString(), lblseq2.getLabels(1).toString ());
}
示例3: testReadResolve
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
/** Tests how serializing labels separately can lead to big losses.
* This currently fails. I'm not sure what to do about this. -cas
*/
public void testReadResolve () throws IOException, ClassNotFoundException
{
LabelAlphabet dict = new LabelAlphabet ();
dict.lookupIndex ("TEST1");
dict.lookupIndex ("TEST2");
dict.lookupIndex ("TEST3");
Label t1 = dict.lookupLabel ("TEST1");
Labelee l = new Labelee (dict, t1);
Labelee l2 = (Labelee) TestSerializable.cloneViaSerialization (l);
assertTrue (l.dict == l2.dict);
assertTrue (dict.lookupLabel("TEST1") == l.theLabel);
assertTrue (dict.lookupLabel("TEST1") == l2.theLabel);
assertTrue (l.theLabel == l2.theLabel);
}
示例4: addSpansFromTags
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
Label backgroundTag)
{
int i = 0;
int docidx = 0;
while (i < tags.size()) {
Label thisTag = dict.lookupLabel (tags.get(i).toString());
int startTokenIdx = i;
while (i < tags.size()) {
Label nextTag = dict.lookupLabel (tags.get(i).toString ());
if (thisTag != nextTag) break;
i++;
}
int endTokenIdx = i;
Span span = input.subspan(startTokenIdx, endTokenIdx);
addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
docidx = ((StringSpan) span).getEndIdx ();
labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
}
}
示例5: testToXml
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testToXml () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例6: testToXmlBIO
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testToXmlBIO () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label BANML = dict.lookupLabel ("B-ANIMAL");
Label ANML = dict.lookupLabel ("ANIMAL");
Label BVB = dict.lookupLabel ("B-VERB");
Label VB = dict.lookupLabel ("I-VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例7: addSpansFromTags
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
Label backgroundTag)
{
int i = 0;
int docidx = 0;
while (i < tags.size ()) {
Label thisTag = dict.lookupLabel (tags.get (i).toString ());
int startTokenIdx = i;
while (++i < tags.size ()) {
Label nextTag = dict.lookupLabel (tags.get (i).toString ());
if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break;
}
int endTokenIdx = i;
Span span = createSpan (input, startTokenIdx, endTokenIdx);
addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
docidx = ((StringSpan) span).getEndIdx ();
if (isBeginTag (thisTag) || isInsideTag (thisTag)) {
thisTag = trimTag (dict, thisTag);
}
labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
}
}
示例8: deserializeObject
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
/** Deserialize an object serialized using
* {@link #serializeObject(ObjectOutputStream, Object)}.
* @throws IOException
* @throws ClassNotFoundException
*/
private Object deserializeObject (ObjectInputStream in)
throws IOException, ClassNotFoundException {
char type = in.readChar ();
Object obj;
switch (type) {
case TYPE_LABEL:
LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet ();
String name = (String) in.readObject ();
obj = ldict.lookupLabel (name);
break;
case TYPE_FEATURE_VECTOR:
int[] indices = (int[]) in.readObject ();
double[] values = (double[]) in.readObject ();
obj = new FeatureVector(getDataAlphabet (), indices, values);
break;
case TYPE_OBJECT:
obj = in.readObject ();
break;
default:
throw new IOException ("Unknown object type " + type);
}
return obj;
}
示例9: pipe
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
StringTokenization ts = (StringTokenization) carrier.getData();
StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
LabelSequence labelSeq = new LabelSequence(dict);
Label start = dict.lookupLabel ("start");
Label notstart = dict.lookupLabel ("notstart");
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
StringSpan t = (StringSpan) ts.getSpan(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例10: testNestedToXML
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testNestedToXML ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
Label JJ = dict.lookupLabel ("ADJ");
Label MAMMAL = dict.lookupLabel ("MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });
LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
Span foxToken = toks.subspan (3, 4);
spans.add (new LabeledSpan (foxToken, MAMMAL, false));
Span bigDogToken = toks.subspan (7, 8);
spans.add (new LabeledSpan (bigDogToken, JJ, false));
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例11: testNestedXMLTokenizationFilter
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testNestedXMLTokenizationFilter ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
Label VB = dict.lookupLabel ("VERB");
Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
// Test the ignore function
extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));
actualXml = extr.toXmlString();
expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例12: eval
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public double[] eval(String[] features) {
Alphabet dataAlphabet = classifer.getAlphabet();
List<Integer> malletFeatureList = new ArrayList<>(features.length);
for (String feature : features) {
int featureId = dataAlphabet.lookupIndex(feature);
if (featureId != -1) {
malletFeatureList.add(featureId);
}
}
int malletFeatures[] = new int[malletFeatureList.size()];
for (int i = 0; i < malletFeatureList.size(); i++) {
malletFeatures[i] = malletFeatureList.get(i);
}
FeatureVector fv = new FeatureVector(classifer.getAlphabet(),
malletFeatures);
Instance instance = new Instance(fv, null, null, null);
Classification result = classifer.classify(instance);
LabelVector labeling = result.getLabelVector();
LabelAlphabet targetAlphabet = classifer.getLabelAlphabet();
double outcomes[] = new double[targetAlphabet.size()];
for (int i = 0; i < outcomes.length; i++) {
Label label = targetAlphabet.lookupLabel(i);
int rank = labeling.getRank(label);
outcomes[i] = labeling.getValueAtRank(rank);
}
return outcomes;
}
示例13: ignoretestNestedToXML
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void ignoretestNestedToXML ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
Label JJ = dict.lookupLabel ("ADJ");
Label MAMMAL = dict.lookupLabel ("MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });
LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
Span foxToken = toks.subspan (3, 4);
spans.add (new LabeledSpan (foxToken, MAMMAL, false));
Span bigDogToken = toks.subspan (7, 8);
spans.add (new LabeledSpan (bigDogToken, JJ, false));
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例14: ignoretestNestedXMLTokenizationFilter
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void ignoretestNestedXMLTokenizationFilter ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
Label VB = dict.lookupLabel ("VERB");
Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
// Test the ignore function
extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));
actualXml = extr.toXmlString();
expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例15: updateTarget
import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void updateTarget(PartialTagging data, Instance inst) {
if (!data.hasExpectedPhones()) {
return;
}
LabelAlphabet labelAlpha = (LabelAlphabet) this.getTargetAlphabet();
List<String> expectedTags = data.getExpectedPredictedTags();
List<Label> predictedPhones = Lists.newArrayList();
for (String tag : expectedTags) {
Label lbl = labelAlpha.lookupLabel(tag, true);
predictedPhones.add(lbl);
}
Label[] labels = predictedPhones.toArray(new Label[predictedPhones.size()]);
inst.setTarget(new LabelSequence(labels));
}