当前位置: 首页>>代码示例>>Java>>正文


Java LabelAlphabet.lookupLabel方法代码示例

本文整理汇总了Java中cc.mallet.types.LabelAlphabet.lookupLabel方法的典型用法代码示例。如果您正苦于以下问题:Java LabelAlphabet.lookupLabel方法的具体用法?Java LabelAlphabet.lookupLabel怎么用?Java LabelAlphabet.lookupLabel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.LabelAlphabet的用法示例。


在下文中一共展示了LabelAlphabet.lookupLabel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: toLabelsSequence

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public LabelsSequence toLabelsSequence (Assignment assn)
{
  int numFactors = numSlices ();
  int maxTime = maxTime ();
  Labels[] lbls = new Labels [maxTime];
  for (int t = 0; t < maxTime; t++) {
    Label[] theseLabels = new Label [numFactors];
    for (int i = 0; i < numFactors; i++) {
      Variable var = varOfIndex (t, i);
      int maxidx;

      if (var != null) {
        maxidx = assn.get (var);
      } else {
        maxidx = 0;
      }

      LabelAlphabet dict = labelOfVar (var).getLabelAlphabet ();
      theseLabels[i] = dict.lookupLabel (maxidx);
    }

    lbls[t] = new Labels (theseLabels);
  }

  return new LabelsSequence (lbls);
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:27,代码来源:LabelsAssignment.java

示例2: testSerializable

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testSerializable () throws IOException, ClassNotFoundException
{
  LabelAlphabet dict = new LabelAlphabet ();
  Labels lbls1 = new Labels (new Label[] {
    dict.lookupLabel ("A"),
    dict.lookupLabel ("B"),
  });
  Labels lbls2 = new Labels (new Label[] {
    dict.lookupLabel ("C"),
    dict.lookupLabel ("A"),
  });
  LabelsSequence lblseq  = new LabelsSequence (new Labels[] { lbls1, lbls2 });
  LabelsSequence lblseq2 = (LabelsSequence) TestSerializable.cloneViaSerialization (lblseq);
  assertEquals (lblseq.size(), lblseq2.size());
  assertEquals (lblseq.getLabels(0).toString(), lblseq2.getLabels(0).toString ());
  assertEquals (lblseq.getLabels(1).toString(), lblseq2.getLabels(1).toString ());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:18,代码来源:TestLabelsSequence.java

示例3: testReadResolve

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
/** Tests how serializing labels separately can lead to big losses.
 *   This currently fails.  I'm not sure what to do about this. -cas
 */
public void testReadResolve () throws IOException, ClassNotFoundException
{
  LabelAlphabet dict = new LabelAlphabet ();
  dict.lookupIndex ("TEST1");
  dict.lookupIndex ("TEST2");
  dict.lookupIndex ("TEST3");


  Label t1 = dict.lookupLabel ("TEST1");
  Labelee l = new Labelee (dict, t1);
  Labelee l2 = (Labelee) TestSerializable.cloneViaSerialization (l);

  assertTrue (l.dict == l2.dict);
  assertTrue (dict.lookupLabel("TEST1") == l.theLabel);
  assertTrue (dict.lookupLabel("TEST1") == l2.theLabel);
  assertTrue (l.theLabel == l2.theLabel);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:TestLabelAlphabet.java

示例4: addSpansFromTags

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
                              Label backgroundTag)
{
  int i = 0;
  int docidx = 0;
  while (i < tags.size()) {
    Label thisTag = dict.lookupLabel (tags.get(i).toString());
    int startTokenIdx = i;
    while (i < tags.size()) {
      Label nextTag = dict.lookupLabel (tags.get(i).toString ());
      if (thisTag != nextTag) break;
      i++;
    }
    int endTokenIdx = i;
    Span span = input.subspan(startTokenIdx, endTokenIdx);
    addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
    docidx = ((StringSpan) span).getEndIdx ();
    labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:DefaultTokenizationFilter.java

示例5: testToXml

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testToXml () {
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label VB = dict.lookupLabel ("VERB");
  LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });

  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:TestDocumentExtraction.java

示例6: testToXmlBIO

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testToXmlBIO () {
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label BANML = dict.lookupLabel ("B-ANIMAL");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label BVB = dict.lookupLabel ("B-VERB");
  Label VB = dict.lookupLabel ("I-VERB");
  LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });

  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:TestDocumentExtraction.java

示例7: addSpansFromTags

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict,
                               Label backgroundTag)
{
  int i = 0;
  int docidx = 0;
  while (i < tags.size ()) {
    Label thisTag = dict.lookupLabel (tags.get (i).toString ());
    int startTokenIdx = i;
    while (++i < tags.size ()) {
      Label nextTag = dict.lookupLabel (tags.get (i).toString ());
      if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break;
    }
    int endTokenIdx = i;
    Span span = createSpan (input, startTokenIdx, endTokenIdx);
    addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag);
    docidx = ((StringSpan) span).getEndIdx ();

    if (isBeginTag (thisTag) || isInsideTag (thisTag)) {
      thisTag = trimTag (dict, thisTag);
    }
    labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag));
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:BIOTokenizationFilter.java

示例8: deserializeObject

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
/** Deserialize an object serialized using
 * {@link #serializeObject(ObjectOutputStream, Object)}.
 * @throws IOException 
 * @throws ClassNotFoundException 
 */
private Object deserializeObject (ObjectInputStream in)
throws IOException, ClassNotFoundException {
    char type = in.readChar ();
    Object obj;
    
    switch (type) {
    case TYPE_LABEL:
        LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet ();
        String name = (String) in.readObject ();
        obj = ldict.lookupLabel (name);
        break;
    case TYPE_FEATURE_VECTOR:
        int[] indices = (int[]) in.readObject ();
        double[] values = (double[]) in.readObject ();
        obj = new FeatureVector(getDataAlphabet (), indices, values);
        break;
    case TYPE_OBJECT:
        obj = in.readObject ();
        break;
    default:
        throw new IOException ("Unknown object type " + type);
    }
    
    return obj;
}
 
开发者ID:iamxiatian,项目名称:wikit,代码行数:31,代码来源:PagedInstanceList.java

示例9: pipe

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
  StringTokenization ts =  (StringTokenization) carrier.getData();
  StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
   final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
   LabelSequence labelSeq = new LabelSequence(dict);
   Label start = dict.lookupLabel ("start");
   Label notstart = dict.lookupLabel ("notstart");

  boolean lastWasSpace = true;
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < ts.size(); i++) {
    StringSpan t = (StringSpan) ts.getSpan(i);
    if (t.getText().equals(" "))
      lastWasSpace = true;
    else {
      sb.append(t.getText());
      newTs.add(t);
      labelSeq.add(lastWasSpace ? "start" : "notstart");
      lastWasSpace = false;
    }
  }
  if (isTargetProcessing())
    carrier.setTarget(labelSeq);
  carrier.setData(newTs);
  carrier.setSource(sb.toString());
  return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestMEMM.java

示例10: testNestedToXML

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testNestedToXML ()
{
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label VB = dict.lookupLabel ("VERB");
  Label JJ = dict.lookupLabel ("ADJ");
  Label MAMMAL = dict.lookupLabel ("MAMMAL");

  LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });

  LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);

  Span foxToken = toks.subspan (3, 4);
  spans.add (new LabeledSpan (foxToken, MAMMAL, false));
  Span bigDogToken = toks.subspan (7, 8);
  spans.add (new LabeledSpan (bigDogToken, JJ, false));

  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestDocumentExtraction.java

示例11: testNestedXMLTokenizationFilter

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void testNestedXMLTokenizationFilter ()
{
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
  Label VB = dict.lookupLabel ("VERB");
  Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
  Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");

  LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());

  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);

  // Test the ignore function

  extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));

  actualXml = extr.toXmlString();
  expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);



}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:34,代码来源:TestDocumentExtraction.java

示例12: eval

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public double[] eval(String[] features) {
  Alphabet dataAlphabet = classifer.getAlphabet();

  List<Integer> malletFeatureList = new ArrayList<>(features.length);

  for (String feature : features) {
    int featureId = dataAlphabet.lookupIndex(feature);
    if (featureId != -1) {
      malletFeatureList.add(featureId);
    }
  }

  int malletFeatures[] = new int[malletFeatureList.size()];
  for (int i = 0; i < malletFeatureList.size(); i++) {
    malletFeatures[i] = malletFeatureList.get(i);
  }

  FeatureVector fv = new FeatureVector(classifer.getAlphabet(),
      malletFeatures);
  Instance instance = new Instance(fv, null, null, null);

  Classification result = classifer.classify(instance);

  LabelVector labeling = result.getLabelVector();

  LabelAlphabet targetAlphabet = classifer.getLabelAlphabet();

  double outcomes[] = new double[targetAlphabet.size()];
  for (int i = 0; i < outcomes.length; i++) {

    Label label = targetAlphabet.lookupLabel(i);

    int rank = labeling.getRank(label);
    outcomes[i] = labeling.getValueAtRank(rank);
  }

  return outcomes;
}
 
开发者ID:kottmann,项目名称:opennlp-mallet-addon,代码行数:39,代码来源:ClassifierModel.java

示例13: ignoretestNestedToXML

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void ignoretestNestedToXML ()
{
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label VB = dict.lookupLabel ("VERB");
  Label JJ = dict.lookupLabel ("ADJ");
  Label MAMMAL = dict.lookupLabel ("MAMMAL");

  LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });

  LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);

  Span foxToken = toks.subspan (3, 4);
  spans.add (new LabeledSpan (foxToken, MAMMAL, false));
  Span bigDogToken = toks.subspan (7, 8);
  spans.add (new LabeledSpan (bigDogToken, JJ, false));

  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);

}
 
开发者ID:cmoen,项目名称:mallet,代码行数:29,代码来源:TestDocumentExtraction.java

示例14: ignoretestNestedXMLTokenizationFilter

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
public void ignoretestNestedXMLTokenizationFilter ()
{
  LabelAlphabet dict = new LabelAlphabet ();
  String document = "the quick brown fox leapt over the lazy dog";
  StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());

  Label O = dict.lookupLabel ("O");
  Label ANML = dict.lookupLabel ("ANIMAL");
  Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
  Label VB = dict.lookupLabel ("VERB");
  Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
  Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");

  LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
  DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());

  String actualXml = extr.toXmlString();
  String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);

  // Test the ignore function

  extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));

  actualXml = extr.toXmlString();
  expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
          "<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
  assertEquals (expectedXml, actualXml);



}
 
开发者ID:cmoen,项目名称:mallet,代码行数:34,代码来源:TestDocumentExtraction.java

示例15: updateTarget

import cc.mallet.types.LabelAlphabet; //导入方法依赖的package包/类
private void updateTarget(PartialTagging data, Instance inst) {
  if (!data.hasExpectedPhones()) {
    return;
  }
  LabelAlphabet labelAlpha = (LabelAlphabet) this.getTargetAlphabet();
  List<String> expectedTags = data.getExpectedPredictedTags();
  List<Label> predictedPhones = Lists.newArrayList();
  for (String tag : expectedTags) {
    Label lbl = labelAlpha.lookupLabel(tag, true);
    predictedPhones.add(lbl);
  }

  Label[] labels = predictedPhones.toArray(new Label[predictedPhones.size()]);
  inst.setTarget(new LabelSequence(labels));
}
 
开发者ID:steveash,项目名称:jg2p,代码行数:16,代码来源:RetaggerMasterPipe.java


注:本文中的cc.mallet.types.LabelAlphabet.lookupLabel方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。