当前位置: 首页>>代码示例>>Java>>正文


Java Instance.setTarget方法代码示例

本文整理汇总了Java中cc.mallet.types.Instance.setTarget方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setTarget方法的具体用法?Java Instance.setTarget怎么用?Java Instance.setTarget使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.Instance的用法示例。


在下文中一共展示了Instance.setTarget方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: extractNumericTarget

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public static void extractNumericTarget(Instance inst, String targetFeature, Annotation instanceAnnotation, AnnotationSet inputAS) {
  Document doc = inputAS.getDocument();
  Object obj = instanceAnnotation.getFeatures().get(targetFeature);
  // Brilliant, we have a missing target, WTF? Throw an exception
  if (obj == null) {
    throw new GateRuntimeException("No target value for feature " + targetFeature
            + " for instance at offset " + gate.Utils.start(instanceAnnotation) + " in document " + doc.getName());
  }
  double value = Double.NaN;
  if (obj instanceof Number) {
    value = ((Number) obj).doubleValue();
  } else {
    String asString = obj.toString();
    try {
      value = Double.parseDouble(asString);
    } catch (Exception ex) {
      throw new GateRuntimeException("Could not convert target value to a double for feature " + targetFeature
              + " for instance at offset " + gate.Utils.start(instanceAnnotation) + " in document " + doc.getName());
    }
  }
  inst.setTarget(value);
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:23,代码来源:FeatureExtraction.java

示例2: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {

    TokenSequence ts = (TokenSequence) carrier.getData();
    TokenSequence targetTokenSeq = new TokenSequence(ts.size());

    for (int i = 0; i < ts.size(); i++) {

        Token t = ts.get(i);
        // System.out.println(t.getText());
        String lineWithoutFirst = t.getText().replaceFirst("[^\\t]*\t", "");
        // System.out.println(lineWithoutFirst);
        // targetTokenSeq.add(lineSplit[0]);

        targetTokenSeq.add(t.getText().split("\t")[0]);
        t.setText(lineWithoutFirst);

    }
    carrier.setTarget(targetTokenSeq);
    carrier.setData(ts);

    return carrier;
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:24,代码来源:LineToTargetTextPipe.java

示例3: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {

    TokenSequence targets = (TokenSequence) carrier.getTarget();

    for (int i = 0; i < targets.size(); i++) {

        Token target = targets.get(i);
        // System.out.println(t.getText());
        String targetLabel = target.getText();
        // System.out.println(lineWithoutFirst);
        // targetTokenSeq.add(lineSplit[0]);
        if (this.replacementMap.containsKey(targetLabel)) {
            target.setText(this.replacementMap.get(targetLabel));
        }
    }
    carrier.setTarget(targets);

    return carrier;
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:21,代码来源:TargetReplacementPipe.java

示例4: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
	TokenSequence ts = (TokenSequence) carrier.getData();
	TokenSequence newTs = new TokenSequence();
	FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet());
	boolean lastWasSpace = true;
	StringBuffer sb = new StringBuffer();
	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.get(i);
		if (t.getText().equals(" "))
			lastWasSpace = true;
		else {
			sb.append(t.getText());
			newTs.add(t);
			labelSeq.add(lastWasSpace ? "start" : "notstart");
			lastWasSpace = false;
		}
	}
	if (isTargetProcessing())
		carrier.setTarget(labelSeq);
	carrier.setData(newTs);
	carrier.setSource(sb.toString());
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:TestCRF.java

示例5: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	TokenSequence targetTokenSeq = new TokenSequence (ts.size());
	for (int i = 0; i < ts.size(); i++) {
		Token t = ts.get(i);
		Matcher matcher = regex.matcher (t.getText());
		if (matcher.matches()) {
			targetTokenSeq.add (matcher.group(targetGroup));
			t.setText (matcher.group (dataGroup));
		} else {
			logger.warning ("Skipping token: No match of "+regex.pattern()
											+" at token #"+i+" with text "+t.getText());
		}
	}
	carrier.setTarget(targetTokenSeq);
	carrier.setData(ts);
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TokenSequenceMatchDataAndTarget.java

示例6: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
	AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
			.getData();
	Clustering original = neighbor.getOriginal();
	int[] cluster1 = neighbor.getOldClusters()[0];
	int[] cluster2 = neighbor.getOldClusters()[1];
	InstanceList list = original.getInstances();
	int[] mergedIndices = neighbor.getNewCluster();
	Record[] records = array2Records(mergedIndices, list);
	Alphabet fieldAlph = records[0].fieldAlphabet();
	Alphabet valueAlph = records[0].valueAlphabet();

	PropertyList features = null;
	features = addExactMatch(records, fieldAlph, valueAlph, features);
	features = addApproxMatch(records, fieldAlph, valueAlph, features);
	features = addSubstringMatch(records, fieldAlph, valueAlph, features);
	carrier
			.setData(new FeatureVector(getDataAlphabet(), features,
					true));

	LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
	String label = (original.getLabel(cluster1[0]) == original
			.getLabel(cluster2[0])) ? "YES" : "NO";
	carrier.setTarget(ldict.lookupLabel(label));			
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:27,代码来源:Clusterings2Clusterer.java

示例7: label

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/** Take input sequence from instance.data and put the output sequence in instance.target. 
 *  Like transduce(Instance), but put best output sequence into instance.target rather than instance.data. */
// TODO Consider a different method name. 
public Instance label (Instance instance)
{
	if (inputPipe != null)
		instance = inputPipe.instanceFrom(instance);
	// TODO Use MaxLatticeFactory instead of hardcoding 
	instance.setTarget(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence());
	if (outputPipe != null)
		instance = outputPipe.instanceFrom(instance);
	return instance;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:Transducer.java

示例8: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
  StringTokenization ts =  (StringTokenization) carrier.getData();
  StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
   final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
   LabelSequence labelSeq = new LabelSequence(dict);
   Label start = dict.lookupLabel ("start");
   Label notstart = dict.lookupLabel ("notstart");

  boolean lastWasSpace = true;
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < ts.size(); i++) {
    StringSpan t = (StringSpan) ts.getSpan(i);
    if (t.getText().equals(" "))
      lastWasSpace = true;
    else {
      sb.append(t.getText());
      newTs.add(t);
      labelSeq.add(lastWasSpace ? "start" : "notstart");
      lastWasSpace = false;
    }
  }
  if (isTargetProcessing())
    carrier.setTarget(labelSeq);
  carrier.setData(newTs);
  carrier.setSource(sb.toString());
  return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestMEMM.java

示例9: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
	if (carrier.getTarget() != null) {
		if (! (carrier.getTarget() instanceof String)) {
			throw new IllegalArgumentException ("Target must be a string for conversion to Double");
		}
		carrier.setTarget( new Double((String) carrier.getTarget()) );
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:10,代码来源:Target2Double.java

示例10: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier) {
	if (carrier.getTarget() != null) {
		if (! (carrier.getTarget() instanceof String)) {
			throw new IllegalArgumentException ("Target must be a String for conversion to Integer");
		}
		carrier.setTarget( new Integer((String) carrier.getTarget()) );
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:10,代码来源:Target2Integer.java

示例11: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (carrier.getTarget() != null) {
		if (carrier.getTarget() instanceof Label)
			throw new IllegalArgumentException ("Already a label.");
		LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
		carrier.setTarget(ldict.lookupLabel (carrier.getTarget()));
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:11,代码来源:Target2Label.java

示例12: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
  LabelsSequence lbls = (LabelsSequence) carrier.getTarget ();
  carrier.setTarget (new LabelsAssignment (lbls));
  return carrier;
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:7,代码来源:LabelsSequence2Assignment.java

示例13: extractClassForSeqTagging

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
 * Extract the class for an instance for sequence tagging.
 *
 * In the case of sequence tagging, we construct the class based on the instance's position
 * relative to the class annotation annType. If it occurs at the beginning of the class
 * annotation, it's a "beginning". In the middle or at the end, it's an "inside". Instances that
 * don't occur in the span of a class annotation are an "outside".
 *
 * @param alph the label alphabet to use, must be an instance of LabelAlphabet
 * @param instanceAnnotation, the instance annotation, e.g. "Token".
 */
public static void extractClassForSeqTagging(Instance inst, Alphabet alph, AnnotationSet classAS, Annotation instanceAnnotation, SeqEncoder seqEncoder) {
  String target = "";
  Document doc = classAS.getDocument();
  if (!(alph instanceof LabelAlphabet)) {
    throw new GateRuntimeException("LF extractClassForSeqTagging: the alphabet must be of type LabelAlphabet"
            + " for instance annotation at offset " + gate.Utils.start(instanceAnnotation)
            + " in document " + doc.getName());
  }
  LabelAlphabet labelalph = (LabelAlphabet) alph;
  AnnotationSet overlappingClassAnns = Utils.getOverlappingAnnotations(classAS, instanceAnnotation);
  // NOTE: previously we only allowed at most one class annotation, but now we are as flexible
  // as possible here: any number of class annotations of any number of types can overlap.
  // The class label for each instance is generated from the complete list of what overlaps,
  // e.g. beginning of T1, beginning of another T1, continuation of T2 and end of T3 
  // The class labels for such combinations only get generated if an overlap actually occurs,
  // so if we only ever see nicely separated annotations, then we will never see the combined labels.
  // Labels are dynamically generated as a string of pipe-separated type names, with the flag
  // (beginning=B, inside=I) appended, or class "O" if outside of all types. 
  // The ordering of types in the class label name must be consistent: TODO!!
  // NOTE: this should be one of several possible ways to do it, implemented in several
  // methods/classes and choosable through e.g. the "algorithmParameter" settings.
  // Then we could use approaches like BIO, BMEWO, BMEWO+ (see
  // https://lingpipe-blog.com/2009/10/14/coding-chunkers-as-taggers-io-bio-bmewo-and-bmewo/)
  // or the ones listed in http://cs229.stanford.edu/proj2005/KrishnanGanapathy-NamedEntityRecognition.pdf
  // Whenever we choose a strategy here, the strategy needs to get stored in the 
  // model info file and re-used at application time!
  // NOTE: need to see if the label alphabet growing setting is handled correctly!
  
  // if there is at least one overlapping class annotation
  if (overlappingClassAnns.size() > 0) {
    // convert the set of annotation types to a list of type|code names
    // this should eventually be parametrizable so we can choose one of several methods
    // ideally we implement this as a method of one of an instance of several Seq2Class 
    // subclasses. If it is an instance we could maybe also implement methods where we
    // need to remember something about the last instance for which we did it!
    target = seqEncoder.seqAnns2ClassLabel(overlappingClassAnns, instanceAnnotation);
  } else {
    //No overlapping mentions so it's an outside
    target = seqEncoder.CODE_OUTSIDE;
  }
  // if debugging is enabled, we put the 
  // the target class on the instance annotation
  if (debugSequenceClass) {
    instanceAnnotation.getFeatures().put("LF_sequenceClass", target);
  }
  // we now have the target label as a string, now set the target of the instance to 
  // to the actual label
  // NOTE: the target alphabet for such an instance MUST be a LabelAlphabet!
  inst.setTarget(labelalph.lookupLabel(target));
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:62,代码来源:FeatureExtraction.java

示例14: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (!(carrier.getData() instanceof CharSequence))
		throw new ClassCastException ("carrier.data is a " + carrier.getData().getClass().getName() +
																 " not a CharSequence");
	TokenSequence dataTokens = new TokenSequence ();
		TokenSequence targetTokens = new TokenSequence ();
	CharSequence string = (CharSequence) carrier.getData();
	String tag = backgroundTag;
	String nextTag = backgroundTag;
	Matcher m = sgmlPattern.matcher (string);
	int textStart = 0;
	int textEnd = 0;
	int nextStart = 0;
	boolean done = false;

	while (!done) {
		done = !findNextValidMatch (m);
		if (done)
			textEnd = string.length()-1;
		else {
			String sgml = m.group();
			int groupCount = m.groupCount();
			if (sgml.charAt(1) == '/')
				nextTag = backgroundTag;
			else{
				nextTag = m.group(0);
				nextTag = sgml.substring(1, sgml.length()-1);
			}
			nextStart = m.end();
			textEnd = m.start();
		}
		if (textEnd - textStart > 0) {
			lexer.setCharSequence (string.subSequence (textStart, textEnd));
			while (lexer.hasNext()) {
				dataTokens.add (new Token ((String) lexer.next()));
				targetTokens.add (new Token (tag));
			}
		}
		textStart = nextStart;
		tag = nextTag;
	}
	carrier.setData(dataTokens);
	carrier.setTarget(targetTokens);

	carrier.setSource(dataTokens);

	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:50,代码来源:SelectiveSGML2TokenSequence.java

示例15: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
   CharSequence string = (CharSequence) carrier.getData();
	StringTokenization dataTokens = new StringTokenization (string);
	TokenSequence targetTokens = new TokenSequence ();
	String tag = backgroundTag;
	String nextTag = backgroundTag;
	Matcher m = sgmlPattern.matcher (string);
	int textStart = 0;
	int textEnd = 0;
	int nextStart = 0;
	boolean done = false;

	logger.fine(sgmlPattern.pattern());
	logger.finer(string.toString());

	while (!done) {
		done = !(m.find());
		if (done)
			textEnd = string.length(); // culotta: changed from string.length()-1 
		else {
			String sgml = m.group();
			logger.finer ("SGML = "+sgml);

			int groupCount = m.groupCount();
			logger.finer(Integer.toString (groupCount));

			if (sgml.charAt(1) == '/')
				nextTag = backgroundTag;
			else{
				//nextTag = m.group(0);
				nextTag = sgml.substring(1, sgml.length()-1);
			}
			logger.finer("nextTag: " + nextTag);

			nextStart = m.end();  // m.end returns one beyond index of last match char
			textEnd = m.start();  // String.subtring does not include index end
			logger.finer ("Text start/end "+textStart+" "+textEnd);
		}
		if (textEnd - textStart > 0) {
			logger.finer ("Tag = "+tag);
			logger.finer ("Target = "+string.subSequence (textStart, textEnd));
			lexer.setCharSequence (string.subSequence (textStart, textEnd));
			while (lexer.hasNext()) {
         lexer.next ();
         int tokStart = textStart + lexer.getStartOffset ();
         int tokEnd = textStart + lexer.getEndOffset ();
         dataTokens.add (new StringSpan (string, tokStart, tokEnd));
				targetTokens.add (new Token (tag));
			}
		}
		textStart = nextStart;
		tag = nextTag;
	}
	carrier.setData(dataTokens);
	carrier.setTarget(targetTokens);

   if (saveSource) {
     carrier.setSource(dataTokens);
   }

	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:64,代码来源:SGML2TokenSequence.java


注:本文中的cc.mallet.types.Instance.setTarget方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。