当前位置: 首页>>代码示例>>Java>>正文


Java Instance.getData方法代码示例

本文整理汇总了Java中cc.mallet.types.Instance.getData方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.getData方法的具体用法?Java Instance.getData怎么用?Java Instance.getData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.Instance的用法示例。


在下文中一共展示了Instance.getData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
  if (!(carrier.getData() instanceof FeatureVector)) {
    System.out.println(carrier.getData().getClass());
    throw new IllegalArgumentException("Data must be of type FeatureVector not " + carrier.getData().getClass() + " we got " + carrier.getData());
  }

  if (this.means.length != this.getDataAlphabet().size()
          || this.variances.length != this.getDataAlphabet().size()) {
    throw new GateRuntimeException("Size mismatch, alphabet="+getDataAlphabet().size()+", stats="+means.length);    }

  FeatureVector fv = (FeatureVector) carrier.getData();
  int[] indices = fv.getIndices();
  double[] values = fv.getValues();
  for (int i = 0; i < indices.length; i++) {
    int index = indices[i];
    if(normalize[index]) {
      double value = values[i];
      double mean = means[index];
      double variance = variances[index];
      double newvalue = (value - mean) / Math.sqrt(variance);
      fv.setValue(index, newvalue);
    }
  }
  return carrier;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:26,代码来源:PipeScaleMeanVarAll.java

示例2: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
	AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
			.getData();
	Clustering original = neighbor.getOriginal();
	int[] cluster1 = neighbor.getOldClusters()[0];
	int[] cluster2 = neighbor.getOldClusters()[1];
	InstanceList list = original.getInstances();
	int[] mergedIndices = neighbor.getNewCluster();
	Record[] records = array2Records(mergedIndices, list);
	Alphabet fieldAlph = records[0].fieldAlphabet();
	Alphabet valueAlph = records[0].valueAlphabet();

	PropertyList features = null;
	features = addExactMatch(records, fieldAlph, valueAlph, features);
	features = addApproxMatch(records, fieldAlph, valueAlph, features);
	features = addSubstringMatch(records, fieldAlph, valueAlph, features);
	carrier
			.setData(new FeatureVector(getDataAlphabet(), features,
					true));

	LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
	String label = (original.getLabel(cluster1[0]) == original
			.getLabel(cluster2[0])) ? "YES" : "NO";
	carrier.setTarget(ldict.lookupLabel(label));			
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:27,代码来源:Clusterings2Clusterer.java

示例3: main

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public static void main (String[] args)
	{
		try {
			Pipe p = new SerialPipes (new Pipe[] {
				new Input2CharSequence (),
				new SGML2TokenSequence()
//				new SGML2TokenSequence (new CharSequenceLexer (Pattern.compile (".")), "O")
				});

			for (int i = 0; i < args.length; i++) {
				Instance carrier = p.instanceFrom(new Instance (new File(args[i]), null, null, null));
				TokenSequence data = (TokenSequence) carrier.getData();
				TokenSequence target = (TokenSequence) carrier.getTarget();
				logger.finer ("===");
				logger.info (args[i]);
				for (int j = 0; j < data.size(); j++)
					logger.info (target.get(j).getText()+" "+data.get(j).getText());
			}
		} catch (Exception e) {
			System.out.println (e);
			e.printStackTrace();
		}
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:SGML2TokenSequence.java

示例4: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
    TokenSequence tokenSequence = (TokenSequence) carrier.getData();
    int prevCount = 0;
    for (int i = 0; i < tokenSequence.size(); i++) {
        Token token = tokenSequence.get(i);
        String tokenText = token.getText().split(this.csvSeparator)[0];
        int count = 0;
        Matcher matcher = this.pattern.matcher(tokenText);
        while (matcher.find()) {
            count++;
        }
        // int count = StringUtils.countMatches(tokenText, this.subString);
        if (count < prevCount) {
            token.setFeatureValue(this.feature, 1.0);
        }
        prevCount = count;
    }
    return carrier;
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:21,代码来源:ShorterLinePipe.java

示例5: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	if (ts.size() > 3
			&& (ts.get(2).getText().equals("-") || ts.get(3).getText().equals("-"))
			&& ts.get(1).getText().matches("[A-Z]+")) {
		String header = ts.get(1).getText();
		if (header.equals("PRESS"))				// Don't bother with "PRESS DIGEST" headers
			return carrier;
		String featureName = "HEADER="+header;
		for (int i = 0; i < ts.size(); i++) {
			Token t = ts.get(i);
			// Only apply this feature to capitalized words, because if we apply it to everything
			// we easily get an immense number of possible feature conjunctions, (e.g. every word
			// with each of these HEADER= features.
			if (t.getText().matches("^[A-Z].*"))
				t.setFeatureValue (featureName, 1.0);
		}
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:TokenSequenceDocHeader.java

示例6: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
  StringTokenization ts =  (StringTokenization) carrier.getData();
  StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
   final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
   LabelSequence labelSeq = new LabelSequence(dict);
   Label start = dict.lookupLabel ("start");
   Label notstart = dict.lookupLabel ("notstart");

  boolean lastWasSpace = true;
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < ts.size(); i++) {
    StringSpan t = (StringSpan) ts.getSpan(i);
    if (t.getText().equals(" "))
      lastWasSpace = true;
    else {
      sb.append(t.getText());
      newTs.add(t);
      labelSeq.add(lastWasSpace ? "start" : "notstart");
      lastWasSpace = false;
    }
  }
  if (isTargetProcessing())
    carrier.setTarget(labelSeq);
  carrier.setData(newTs);
  carrier.setSource(sb.toString());
  return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TestMEMM.java

示例7: preProcess

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public BitSet preProcess(InstanceList data) {
  // count
  int ii = 0;
  int fi;
  FeatureVector fv;
  BitSet bitSet = new BitSet(data.size());
  for (Instance instance : data) {
    FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
    for (int ip = 0; ip < fvs.size(); ip++) {
      fv = fvs.get(ip);
      for (int loc = 0; loc < fv.numLocations(); loc++) {
        fi = fv.indexAtLocation(loc);
        if (constraints.containsKey(fi)) {
          constraints.get(fi).count += 1;
          bitSet.set(ii);
        }
      }
      if (constraints.containsKey(fv.getAlphabet().size())) {
        bitSet.set(ii);
        constraints.get(fv.getAlphabet().size()).count += 1;
      }
    }

    ii++;
  }
  return bitSet;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:OneLabelGEConstraints.java

示例8: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
  if (!(carrier.getData() instanceof FeatureVector)) {
    System.out.println(carrier.getData().getClass());
    throw new IllegalArgumentException("Data must be of type FeatureVector not " + carrier.getData().getClass() + " we got " + carrier.getData());
  }

  if (min.length != getDataAlphabet().size()
          || max.length != getDataAlphabet().size()) {
    throw new GateRuntimeException("Size mismatch, alphabet="+getDataAlphabet().size()+", stats="+min.length);
  }

  FeatureVector fv = (FeatureVector) carrier.getData();
  int[] indices = fv.getIndices();
  double[] values = fv.getValues();
  for (int i = 0; i < indices.length; i++) {
    int index = indices[i];
    double mi = min[index];
    double ma = max[index];
    double span = ma - mi;
    if(normalize[index] && span > 0.0) {
      double value = values[i];
      // NOTE: this could in theory cause an overflow error but we ignore this here!
      double newvalue = (value - mi) / span;
      fv.setValue(index, newvalue);
    }
  }
  return carrier;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:29,代码来源:PipeScaleMinMaxAll.java

示例9: getFromMallet

import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
 * Create libsvm representation from Mallet.
 *
 * @param instances
 * @return
 */
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
  InstanceList instances = crm.getRepresentationMallet();
  svm_problem prob = new svm_problem();
  int numTrainingInstances = instances.size();
  prob.l = numTrainingInstances;
  prob.y = new double[prob.l];
  prob.x = new svm_node[prob.l][];

  for (int i = 0; i < numTrainingInstances; i++) {
    Instance instance = instances.get(i);

    //Labels
    // convert the target: if we get a label, convert to index,
    // if we get a double, use it directly
    Object tobj = instance.getTarget();
    if (tobj instanceof Label) {
      prob.y[i] = ((Label) instance.getTarget()).getIndex();
    } else if (tobj instanceof Double) {
      prob.y[i] = (double) tobj;
    } else {
      throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
    }

    //Features
    SparseVector data = (SparseVector) instance.getData();
    int[] indices = data.getIndices();
    double[] values = data.getValues();
    prob.x[i] = new svm_node[indices.length];
    for (int j = 0; j < indices.length; j++) {
      svm_node node = new svm_node();
      node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
      node.value = values[j];
      prob.x[i][j] = node;
    }
  }
  return prob;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:44,代码来源:CorpusRepresentationLibSVM.java

示例10: gatherConstraints

import cc.mallet.types.Instance; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
protected void gatherConstraints(
		PRAuxiliaryModel auxModel, double[][][][] cachedDots) {
	initialProbList = new ArrayList<double[]>();
	finalProbList = new ArrayList<double[]>();
	transitionProbList = new ArrayList<double[][][]>();

	constraints = new CRF.Factors(crf.getParameters());
	expectations = new CRF.Factors(crf.getParameters());

	constraints.zero();
	for (int ii = 0; ii < trainingSet.size(); ii++) {
	  Instance inst = trainingSet.get(ii);
		Sequence input = (Sequence) inst.getData();

		SumLatticePR geLatt = 
			new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true);
		double gammas[][] = geLatt.getGammas();

		double initialProbs[] = toProbabilities(gammas[0]);
		initialProbList.add(initialProbs);

		double finalProbs[] = toProbabilities(gammas[gammas.length - 1]);
		finalProbList.add(finalProbs);

		double transitionProbs[][][] = geLatt.getXis();
		toProbabilities(transitionProbs);
		transitionProbList.add(transitionProbs);

		new SumLatticeKL(crf, input, initialProbs,
				finalProbs, transitionProbs, null, constraints.new Incrementor());
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:34,代码来源:CRFOptimizableByKL.java

示例11: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	CharSequence string = (CharSequence) carrier.getData();
	Matcher m = regex.matcher(string);
	if (m.find()) {
		//System.out.println ("CharSubsequence found match");
		carrier.setData(m.group(groupIndex));
		return carrier;
	} else {
		//System.out.println ("CharSubsequence found no match");
		carrier.setData("");
		return carrier;
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:CharSubsequence.java

示例12: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	AugmentableFeatureVector afv = (AugmentableFeatureVector)carrier.getData();
	double v;
	for (int i = afv.numLocations() - 1; i >= 0; i--) {
		v = afv.valueAtLocation (i);
		if (v >= 1)
			afv.setValueAtLocation (i, Math.log(v)+1);
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:AugmentableFeatureVectorLogScale.java

示例13: pipe

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	TokenSequence ts = (TokenSequence) carrier.getData();
	int tsSize = ts.size();
	PropertyList[] newFeatures = new PropertyList[tsSize];
	for (int i = 0; i < tsSize; i++) {
		Token t = ts.get (i);
		PropertyList pl = t.getFeatures();
		newFeatures[i] = pl;
		for (int position = i + leftBoundary; position < i + rightBoundary; position++) {
			if (position == i && !includeCurrentToken)
				continue;
			PropertyList pl2;
			if (position < 0)
				pl2 = startfs[-position];
			else if (position >= tsSize)
				pl2 = endfs[position-tsSize];
			else
				pl2 = ts.get(position).getFeatures ();
			PropertyList.Iterator pl2i = pl2.iterator();
			while (pl2i.hasNext()) {
				pl2i.next();
				String key = pl2i.getKey();
				if (featureRegex == null || featureRegex.matcher(key).matches()) {
					newFeatures[i] = PropertyList.add ((namePrefixLeft == null || position-i>0 ? namePrefix : namePrefixLeft)+key,
																						 pl2i.getNumericValue(), newFeatures[i]);
				}
			}
		}
	}
	for (int i = 0; i < tsSize; i++) {
		// Put the new PropertyLists in place
		ts.get (i).setFeatures (newFeatures[i]);
	}
	return carrier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:37,代码来源:FeaturesInWindow.java

示例14: getClassificationScores

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public void getClassificationScores (Instance instance, double[] scores)
{
	FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
	int numFeatures = instance.getDataAlphabet().size()+1;
	int numLabels = fvs.size();
	assert (scores.length == fvs.size());

	for (int instanceNumber=0; instanceNumber < fvs.size(); instanceNumber++) {
		FeatureVector fv = (FeatureVector)fvs.get(instanceNumber);
		// Make sure the feature vector's feature dictionary matches
		// what we are expecting from our data pipe (and thus our notion
		// of feature probabilities.
		assert (fv.getAlphabet ()
						== this.instancePipe.getDataAlphabet ());
		
		// Include the feature weights according to each label
		scores[instanceNumber] = parameters[0*numFeatures + defaultFeatureIndex]
															 + MatrixOps.rowDotProduct (parameters, numFeatures,
																													0, fv,
																													defaultFeatureIndex,
																													(perClassFeatureSelection == null
																													 ? featureSelection
																													 : perClassFeatureSelection[0]));
	}

	// Move scores to a range where exp() is accurate, and normalize
	double max = MatrixOps.max (scores);
	double sum = 0;
	for (int li = 0; li < numLabels; li++)
		sum += (scores[li] = Math.exp (scores[li] - max));
	for (int li = 0; li < numLabels; li++) {
		scores[li] /= sum;
		// xxxNaN assert (!Double.isNaN(scores[li]));
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:36,代码来源:RankMaxEnt.java

示例15: evaluateInstanceList

import cc.mallet.types.Instance; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer trainer, InstanceList instances, String description) 
 {
	int numCorrectTokens;
	int totalTokens;

	Transducer transducer = trainer.getTransducer();
	totalTokens = numCorrectTokens = 0;
	for (int i = 0; i < instances.size(); i++) {
		Instance instance = instances.get(i);
		Sequence input = (Sequence) instance.getData();
		Sequence trueOutput = (Sequence) instance.getTarget();
		assert (input.size() == trueOutput.size());
		//System.err.println ("TokenAccuracyEvaluator "+i+" length="+input.size());
		Sequence predOutput = transducer.transduce (input);
		assert (predOutput.size() == trueOutput.size());

		for (int j = 0; j < trueOutput.size(); j++) {
			totalTokens++;
			if (trueOutput.get(j).equals(predOutput.get(j)))
				numCorrectTokens++;
		}
		//System.err.println ("TokenAccuracyEvaluator "+i+" numCorrectTokens="+numCorrectTokens+" totalTokens="+totalTokens+" accuracy="+((double)numCorrectTokens)/totalTokens);
	}
	double acc = ((double)numCorrectTokens)/totalTokens;
	//System.err.println ("TokenAccuracyEvaluator accuracy="+acc);
	accuracy.put(description, acc);
	logger.info (description +" accuracy="+acc);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:TokenAccuracyEvaluator.java


注:本文中的cc.mallet.types.Instance.getData方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。