本文整理汇总了Java中cc.mallet.types.Instance.setTarget方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setTarget方法的具体用法?Java Instance.setTarget怎么用?Java Instance.setTarget使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.Instance
的用法示例。
在下文中一共展示了Instance.setTarget方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractNumericTarget
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public static void extractNumericTarget(Instance inst, String targetFeature, Annotation instanceAnnotation, AnnotationSet inputAS) {
Document doc = inputAS.getDocument();
Object obj = instanceAnnotation.getFeatures().get(targetFeature);
// Brilliant, we have a missing target, WTF? Throw an exception
if (obj == null) {
throw new GateRuntimeException("No target value for feature " + targetFeature
+ " for instance at offset " + gate.Utils.start(instanceAnnotation) + " in document " + doc.getName());
}
double value = Double.NaN;
if (obj instanceof Number) {
value = ((Number) obj).doubleValue();
} else {
String asString = obj.toString();
try {
value = Double.parseDouble(asString);
} catch (Exception ex) {
throw new GateRuntimeException("Could not convert target value to a double for feature " + targetFeature
+ " for instance at offset " + gate.Utils.start(instanceAnnotation) + " in document " + doc.getName());
}
}
inst.setTarget(value);
}
示例2: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence targetTokenSeq = new TokenSequence(ts.size());
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
// System.out.println(t.getText());
String lineWithoutFirst = t.getText().replaceFirst("[^\\t]*\t", "");
// System.out.println(lineWithoutFirst);
// targetTokenSeq.add(lineSplit[0]);
targetTokenSeq.add(t.getText().split("\t")[0]);
t.setText(lineWithoutFirst);
}
carrier.setTarget(targetTokenSeq);
carrier.setData(ts);
return carrier;
}
示例3: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence targets = (TokenSequence) carrier.getTarget();
for (int i = 0; i < targets.size(); i++) {
Token target = targets.get(i);
// System.out.println(t.getText());
String targetLabel = target.getText();
// System.out.println(lineWithoutFirst);
// targetTokenSeq.add(lineSplit[0]);
if (this.replacementMap.containsKey(targetLabel)) {
target.setText(this.replacementMap.get(targetLabel));
}
}
carrier.setTarget(targets);
return carrier;
}
示例4: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence newTs = new TokenSequence();
FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet());
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例5: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence targetTokenSeq = new TokenSequence (ts.size());
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
Matcher matcher = regex.matcher (t.getText());
if (matcher.matches()) {
targetTokenSeq.add (matcher.group(targetGroup));
t.setText (matcher.group (dataGroup));
} else {
logger.warning ("Skipping token: No match of "+regex.pattern()
+" at token #"+i+" with text "+t.getText());
}
}
carrier.setTarget(targetTokenSeq);
carrier.setData(ts);
return carrier;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TokenSequenceMatchDataAndTarget.java
示例6: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
.getData();
Clustering original = neighbor.getOriginal();
int[] cluster1 = neighbor.getOldClusters()[0];
int[] cluster2 = neighbor.getOldClusters()[1];
InstanceList list = original.getInstances();
int[] mergedIndices = neighbor.getNewCluster();
Record[] records = array2Records(mergedIndices, list);
Alphabet fieldAlph = records[0].fieldAlphabet();
Alphabet valueAlph = records[0].valueAlphabet();
PropertyList features = null;
features = addExactMatch(records, fieldAlph, valueAlph, features);
features = addApproxMatch(records, fieldAlph, valueAlph, features);
features = addSubstringMatch(records, fieldAlph, valueAlph, features);
carrier
.setData(new FeatureVector(getDataAlphabet(), features,
true));
LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
String label = (original.getLabel(cluster1[0]) == original
.getLabel(cluster2[0])) ? "YES" : "NO";
carrier.setTarget(ldict.lookupLabel(label));
return carrier;
}
示例7: label
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Take input sequence from instance.data and put the output sequence in instance.target.
* Like transduce(Instance), but put best output sequence into instance.target rather than instance.data. */
// TODO Consider a different method name.
public Instance label (Instance instance)
{
if (inputPipe != null)
instance = inputPipe.instanceFrom(instance);
// TODO Use MaxLatticeFactory instead of hardcoding
instance.setTarget(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence());
if (outputPipe != null)
instance = outputPipe.instanceFrom(instance);
return instance;
}
示例8: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
StringTokenization ts = (StringTokenization) carrier.getData();
StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
LabelSequence labelSeq = new LabelSequence(dict);
Label start = dict.lookupLabel ("start");
Label notstart = dict.lookupLabel ("notstart");
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
StringSpan t = (StringSpan) ts.getSpan(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例9: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
if (carrier.getTarget() != null) {
if (! (carrier.getTarget() instanceof String)) {
throw new IllegalArgumentException ("Target must be a string for conversion to Double");
}
carrier.setTarget( new Double((String) carrier.getTarget()) );
}
return carrier;
}
示例10: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
if (carrier.getTarget() != null) {
if (! (carrier.getTarget() instanceof String)) {
throw new IllegalArgumentException ("Target must be a String for conversion to Integer");
}
carrier.setTarget( new Integer((String) carrier.getTarget()) );
}
return carrier;
}
示例11: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (carrier.getTarget() != null) {
if (carrier.getTarget() instanceof Label)
throw new IllegalArgumentException ("Already a label.");
LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
carrier.setTarget(ldict.lookupLabel (carrier.getTarget()));
}
return carrier;
}
示例12: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
LabelsSequence lbls = (LabelsSequence) carrier.getTarget ();
carrier.setTarget (new LabelsAssignment (lbls));
return carrier;
}
示例13: extractClassForSeqTagging
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
* Extract the class for an instance for sequence tagging.
*
* In the case of sequence tagging, we construct the class based on the instance's position
* relative to the class annotation annType. If it occurs at the beginning of the class
* annotation, it's a "beginning". In the middle or at the end, it's an "inside". Instances that
* don't occur in the span of a class annotation are an "outside".
*
* @param alph the label alphabet to use, must be an instance of LabelAlphabet
* @param instanceAnnotation, the instance annotation, e.g. "Token".
*/
public static void extractClassForSeqTagging(Instance inst, Alphabet alph, AnnotationSet classAS, Annotation instanceAnnotation, SeqEncoder seqEncoder) {
String target = "";
Document doc = classAS.getDocument();
if (!(alph instanceof LabelAlphabet)) {
throw new GateRuntimeException("LF extractClassForSeqTagging: the alphabet must be of type LabelAlphabet"
+ " for instance annotation at offset " + gate.Utils.start(instanceAnnotation)
+ " in document " + doc.getName());
}
LabelAlphabet labelalph = (LabelAlphabet) alph;
AnnotationSet overlappingClassAnns = Utils.getOverlappingAnnotations(classAS, instanceAnnotation);
// NOTE: previously we only allowed at most one class annotation, but now we are as flexible
// as possible here: any number of class annotations of any number of types can overlap.
// The class label for each instance is generated from the complete list of what overlaps,
// e.g. beginning of T1, beginning of another T1, continuation of T2 and end of T3
// The class labels for such combinations only get generated if an overlap actually occurs,
// so if we only ever see nicely separated annotations, then we will never see the combined labels.
// Labels are dynamically generated as a string of pipe-separated type names, with the flag
// (beginning=B, inside=I) appended, or class "O" if outside of all types.
// The ordering of types in the class label name must be consistent: TODO!!
// NOTE: this should be one of several possible ways to do it, implemented in several
// methods/classes and choosable through e.g. the "algorithmParameter" settings.
// Then we could use approaches like BIO, BMEWO, BMEWO+ (see
// https://lingpipe-blog.com/2009/10/14/coding-chunkers-as-taggers-io-bio-bmewo-and-bmewo/)
// or the ones listed in http://cs229.stanford.edu/proj2005/KrishnanGanapathy-NamedEntityRecognition.pdf
// Whenever we choose a strategy here, the strategy needs to get stored in the
// model info file and re-used at application time!
// NOTE: need to see if the label alphabet growing setting is handled correctly!
// if there is at least one overlapping class annotation
if (overlappingClassAnns.size() > 0) {
// convert the set of annotation types to a list of type|code names
// this should eventually be parametrizable so we can choose one of several methods
// ideally we implement this as a method of one of an instance of several Seq2Class
// subclasses. If it is an instance we could maybe also implement methods where we
// need to remember something about the last instance for which we did it!
target = seqEncoder.seqAnns2ClassLabel(overlappingClassAnns, instanceAnnotation);
} else {
//No overlapping mentions so it's an outside
target = seqEncoder.CODE_OUTSIDE;
}
// if debugging is enabled, we put the
// the target class on the instance annotation
if (debugSequenceClass) {
instanceAnnotation.getFeatures().put("LF_sequenceClass", target);
}
// we now have the target label as a string, now set the target of the instance to
// to the actual label
// NOTE: the target alphabet for such an instance MUST be a LabelAlphabet!
inst.setTarget(labelalph.lookupLabel(target));
}
示例14: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
if (!(carrier.getData() instanceof CharSequence))
throw new ClassCastException ("carrier.data is a " + carrier.getData().getClass().getName() +
" not a CharSequence");
TokenSequence dataTokens = new TokenSequence ();
TokenSequence targetTokens = new TokenSequence ();
CharSequence string = (CharSequence) carrier.getData();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
while (!done) {
done = !findNextValidMatch (m);
if (done)
textEnd = string.length()-1;
else {
String sgml = m.group();
int groupCount = m.groupCount();
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
nextTag = m.group(0);
nextTag = sgml.substring(1, sgml.length()-1);
}
nextStart = m.end();
textEnd = m.start();
}
if (textEnd - textStart > 0) {
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
dataTokens.add (new Token ((String) lexer.next()));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
carrier.setSource(dataTokens);
return carrier;
}
示例15: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
CharSequence string = (CharSequence) carrier.getData();
StringTokenization dataTokens = new StringTokenization (string);
TokenSequence targetTokens = new TokenSequence ();
String tag = backgroundTag;
String nextTag = backgroundTag;
Matcher m = sgmlPattern.matcher (string);
int textStart = 0;
int textEnd = 0;
int nextStart = 0;
boolean done = false;
logger.fine(sgmlPattern.pattern());
logger.finer(string.toString());
while (!done) {
done = !(m.find());
if (done)
textEnd = string.length(); // culotta: changed from string.length()-1
else {
String sgml = m.group();
logger.finer ("SGML = "+sgml);
int groupCount = m.groupCount();
logger.finer(Integer.toString (groupCount));
if (sgml.charAt(1) == '/')
nextTag = backgroundTag;
else{
//nextTag = m.group(0);
nextTag = sgml.substring(1, sgml.length()-1);
}
logger.finer("nextTag: " + nextTag);
nextStart = m.end(); // m.end returns one beyond index of last match char
textEnd = m.start(); // String.subtring does not include index end
logger.finer ("Text start/end "+textStart+" "+textEnd);
}
if (textEnd - textStart > 0) {
logger.finer ("Tag = "+tag);
logger.finer ("Target = "+string.subSequence (textStart, textEnd));
lexer.setCharSequence (string.subSequence (textStart, textEnd));
while (lexer.hasNext()) {
lexer.next ();
int tokStart = textStart + lexer.getStartOffset ();
int tokEnd = textStart + lexer.getEndOffset ();
dataTokens.add (new StringSpan (string, tokStart, tokEnd));
targetTokens.add (new Token (tag));
}
}
textStart = nextStart;
tag = nextTag;
}
carrier.setData(dataTokens);
carrier.setTarget(targetTokens);
if (saveSource) {
carrier.setSource(dataTokens);
}
return carrier;
}