本文整理汇总了Java中cc.mallet.types.LabelSequence类的典型用法代码示例。如果您正苦于以下问题:Java LabelSequence类的具体用法?Java LabelSequence怎么用?Java LabelSequence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
LabelSequence类属于cc.mallet.types包,在下文中一共展示了LabelSequence类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: printTheta
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void printTheta(ArrayList<Topication> dataset, File f, double threshold, int max) throws IOException{
PrintWriter pw = new PrintWriter(new FileWriter(f));
int[] topicCounts = new int[ numTopics ];
int docLen;
for (int di = 0; di < dataset.size(); di++) {
LabelSequence topicSequence = dataset.get(di).topicSequence;
int[] currentDocTopics = topicSequence.getFeatures();
docLen = currentDocTopics.length;
for (int token=0; token < docLen; token++) {
topicCounts[ currentDocTopics[token] ]++;
}
pw.println(dataset.get(di).instance.getName());
// n(t|d)+alpha(t) / docLen + alphaSum
for (int topic = 0; topic < numTopics; topic++) {
double prob = (double) (topicCounts[topic]+alpha[topic]) / (docLen + alphaSum);
pw.println("topic"+ topic + "\t" + prob);
}
pw.println();
Arrays.fill(topicCounts, 0);
}
pw.close();
}
示例2: printState
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void printState (ArrayList<Topication> dataset, PrintStream out) {
out.println ("#doc source pos typeindex type topic");
for (int di = 0; di < dataset.size(); di++) {
FeatureSequence tokenSequence = (FeatureSequence) dataset.get(di).instance.getData();
LabelSequence topicSequence = dataset.get(di).topicSequence;
String source = "NA";
if (dataset.get(di).instance.getSource() != null) {
source = dataset.get(di).instance.getSource().toString();
}
for (int pi = 0; pi < topicSequence.getLength(); pi++) {
int type = tokenSequence.getIndexAtPosition(pi);
int topic = topicSequence.getIndexAtPosition(pi);
out.print(di); out.print(' ');
out.print(source); out.print(' ');
out.print(pi); out.print(' ');
out.print(type); out.print(' ');
out.print(alphabet.lookupObject(type)); out.print(' ');
out.print(topic); out.println();
}
}
}
示例3: testToXml
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void testToXml () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例4: testToXmlBIO
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void testToXmlBIO () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label BANML = dict.lookupLabel ("B-ANIMAL");
Label ANML = dict.lookupLabel ("ANIMAL");
Label BVB = dict.lookupLabel ("B-VERB");
Label VB = dict.lookupLabel ("I-VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例5: ignoretestToXml
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void ignoretestToXml () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例6: ignoretestToXmlBIO
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void ignoretestToXmlBIO () {
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label BANML = dict.lookupLabel ("B-ANIMAL");
Label ANML = dict.lookupLabel ("ANIMAL");
Label BVB = dict.lookupLabel ("B-VERB");
Label VB = dict.lookupLabel ("I-VERB");
LabelSequence tags = new LabelSequence (new Label[] { O, BANML, ANML, BANML, BVB, VB, O, ANML, ANML });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例7: pipe
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public Instance pipe(Instance carrier)
{
StringTokenization ts = (StringTokenization) carrier.getData();
StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
LabelSequence labelSeq = new LabelSequence(dict);
Label start = dict.lookupLabel ("start");
Label notstart = dict.lookupLabel ("notstart");
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
StringSpan t = (StringSpan) ts.getSpan(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例8: convert
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
/**
*
* @param inst input instance, with FeatureVectorSequence as data.
* @param alphabetsPipe a Noop pipe containing the data and target alphabets for
* the resulting InstanceList and AugmentableFeatureVectors
* @return list of instances, each with one AugmentableFeatureVector as data
*/
public static InstanceList convert(Instance inst, Noop alphabetsPipe)
{
InstanceList ret = new InstanceList(alphabetsPipe);
Object obj = inst.getData();
assert(obj instanceof FeatureVectorSequence);
FeatureVectorSequence fvs = (FeatureVectorSequence) obj;
LabelSequence ls = (LabelSequence) inst.getTarget();
assert(fvs.size() == ls.size());
Object instName = (inst.getName() == null ? "NONAME" : inst.getName());
for (int j = 0; j < fvs.size(); j++) {
FeatureVector fv = fvs.getFeatureVector(j);
int[] indices = fv.getIndices();
FeatureVector data = new AugmentableFeatureVector (alphabetsPipe.getDataAlphabet(),
indices, fv.getValues(), indices.length);
Labeling target = ls.getLabelAtPosition(j);
String name = instName.toString() + "[email protected]_POS_" + (j + 1);
Object source = inst.getSource();
Instance toAdd = alphabetsPipe.pipe(new Instance(data, target, name, source));
ret.add(toAdd);
}
return ret;
}
示例9: next
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public Instance next () {
if (!dataSubiterator.hasNext()) {
assert (superIterator.hasNext());
superInstance = superIterator.next();
dataSubiterator = ((FeatureVectorSequence)superInstance.getData()).iterator();
targetSubiterator = ((LabelSequence)superInstance.getTarget()).iterator();
}
// We are assuming sequences don't have zero length
assert (dataSubiterator.hasNext());
assert (targetSubiterator.hasNext());
return new Instance (dataSubiterator.next(), targetSubiterator.next(),
superInstance.getSource()+" tokensequence:"+count++, null);
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:FeatureVectorSequence2FeatureVectors.java
示例10: testNestedToXML
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void testNestedToXML ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label VB = dict.lookupLabel ("VERB");
Label JJ = dict.lookupLabel ("ADJ");
Label MAMMAL = dict.lookupLabel ("MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML, VB, O, ANML, ANML, ANML });
LabeledSpans spans = new DefaultTokenizationFilter ().constructLabeledSpans (dict, document, O, toks, tags);
Span foxToken = toks.subspan (3, 4);
spans.add (new LabeledSpan (foxToken, MAMMAL, false));
Span bigDogToken = toks.subspan (7, 8);
spans.add (new LabeledSpan (bigDogToken, JJ, false));
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, spans, null, "O");
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例11: testNestedXMLTokenizationFilter
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public void testNestedXMLTokenizationFilter ()
{
LabelAlphabet dict = new LabelAlphabet ();
String document = "the quick brown fox leapt over the lazy dog";
StringTokenization toks = new StringTokenization (document, new CharSequenceLexer ());
Label O = dict.lookupLabel ("O");
Label ANML = dict.lookupLabel ("ANIMAL");
Label ANML_MAMM = dict.lookupLabel ("ANIMAL|MAMMAL");
Label VB = dict.lookupLabel ("VERB");
Label ANML_JJ = dict.lookupLabel ("ANIMAL|ADJ");
Label ANML_JJ_MAMM = dict.lookupLabel ("ANIMAL|ADJ|MAMMAL");
LabelSequence tags = new LabelSequence (new Label[] { O, ANML, ANML, ANML_MAMM, VB, O, ANML, ANML_JJ, ANML_JJ_MAMM });
DocumentExtraction extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter ());
String actualXml = extr.toXmlString();
String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
// Test the ignore function
extr = new DocumentExtraction ("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter (Pattern.compile ("AD.*")));
actualXml = extr.toXmlString();
expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n" +
"<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
assertEquals (expectedXml, actualXml);
}
示例12: pipe
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
String line = (String) carrier.getData();
LabelAlphabet labels = (LabelAlphabet) getTargetAlphabet();
TokenSequence ts = new TokenSequence();
String[] tokens = line.split(" ");
LabelSequence target = new LabelSequence(labels, tokens.length - 1);
assert (tokens.length > 1);
String prevTag = null;
for (int i = 1; i < tokens.length; i++) {
Matcher matcher = tokenPattern.matcher(tokens[i]);
matcher.matches();
String word = matcher.group(1);
String tag = matcher.group(2);
Token tok = new Token(word);
tok.setFeatureValue(word, 1.0);
ts.add(tok);
String tmp = tag;
if (targets.contains(tag)) {
if (tag.equals(prevTag)) {
tag = "I-GENE";
} else {
tag = "B-GENE";
}
} else {
tag = "O";
}
target.add(tag);
prevTag = tmp;
}
carrier.setData(ts);
carrier.setTarget(target);
return carrier;
}
示例13: getStatistics
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
private void getStatistics(CRF crf, Instance inst,
Map<Instance, Double> entropyMap, Map<Instance, Double> probMap,
boolean setTarget) {
FeatureVectorSequence input = (FeatureVectorSequence) inst.getData();
MaxLatticeDefault maxLattice = new MaxLatticeDefault(crf, input);
Sequence output = maxLattice.bestOutputSequence();
double labeled = new SumLatticeDefault(crf, input, output)
.getTotalWeight();
SumLattice lattice = new SumLatticeDefault(crf, input, true);
double unlabeled = lattice.getTotalWeight();
EntropyLattice entropyLattice = new EntropyLattice(input,
lattice.getGammas(), lattice.getXis(), crf, null, 1);
double entropy = -entropyLattice.getEntropy();
double prob = Math.exp(labeled - unlabeled);
entropyMap.put(inst, entropy);
probMap.put(inst, prob);
if (setTarget) {
inst.unLock();
int n = output.size();
LabelSequence seq = new LabelSequence(
(LabelAlphabet) crf.getOutputAlphabet(), n);
for (int i = 0; i < output.size(); i++) {
seq.add(output.get(i));
}
inst.setTarget(seq);
inst.lock();
}
}
示例14: TextInstance
import cc.mallet.types.LabelSequence; //导入依赖的package包/类
public TextInstance(TextSentence textSentence, Alphabet targetAlphabet) {
super(new TokenSequence(), new LabelSequence(targetAlphabet), textSentence.getId(), textSentence);
TokenSequence tokenSequence = (TokenSequence) getData();
LabelSequence labelSequence = (LabelSequence) getTarget();
for (TextToken textToken : textSentence) {
String text = textToken.getText();
String label = textToken.getTag();
tokenSequence.add(new Token(text));
labelSequence.add(label);
}
}