本文整理汇总了Java中cc.mallet.types.Instance.getData方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.getData方法的具体用法?Java Instance.getData怎么用?Java Instance.getData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.Instance
的用法示例。
在下文中一共展示了Instance.getData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
if (!(carrier.getData() instanceof FeatureVector)) {
System.out.println(carrier.getData().getClass());
throw new IllegalArgumentException("Data must be of type FeatureVector not " + carrier.getData().getClass() + " we got " + carrier.getData());
}
if (this.means.length != this.getDataAlphabet().size()
|| this.variances.length != this.getDataAlphabet().size()) {
throw new GateRuntimeException("Size mismatch, alphabet="+getDataAlphabet().size()+", stats="+means.length); }
FeatureVector fv = (FeatureVector) carrier.getData();
int[] indices = fv.getIndices();
double[] values = fv.getValues();
for (int i = 0; i < indices.length; i++) {
int index = indices[i];
if(normalize[index]) {
double value = values[i];
double mean = means[index];
double variance = variances[index];
double newvalue = (value - mean) / Math.sqrt(variance);
fv.setValue(index, newvalue);
}
}
return carrier;
}
示例2: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) carrier
.getData();
Clustering original = neighbor.getOriginal();
int[] cluster1 = neighbor.getOldClusters()[0];
int[] cluster2 = neighbor.getOldClusters()[1];
InstanceList list = original.getInstances();
int[] mergedIndices = neighbor.getNewCluster();
Record[] records = array2Records(mergedIndices, list);
Alphabet fieldAlph = records[0].fieldAlphabet();
Alphabet valueAlph = records[0].valueAlphabet();
PropertyList features = null;
features = addExactMatch(records, fieldAlph, valueAlph, features);
features = addApproxMatch(records, fieldAlph, valueAlph, features);
features = addSubstringMatch(records, fieldAlph, valueAlph, features);
carrier
.setData(new FeatureVector(getDataAlphabet(), features,
true));
LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
String label = (original.getLabel(cluster1[0]) == original
.getLabel(cluster2[0])) ? "YES" : "NO";
carrier.setTarget(ldict.lookupLabel(label));
return carrier;
}
示例3: main
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public static void main (String[] args)
{
try {
Pipe p = new SerialPipes (new Pipe[] {
new Input2CharSequence (),
new SGML2TokenSequence()
// new SGML2TokenSequence (new CharSequenceLexer (Pattern.compile (".")), "O")
});
for (int i = 0; i < args.length; i++) {
Instance carrier = p.instanceFrom(new Instance (new File(args[i]), null, null, null));
TokenSequence data = (TokenSequence) carrier.getData();
TokenSequence target = (TokenSequence) carrier.getTarget();
logger.finer ("===");
logger.info (args[i]);
for (int j = 0; j < data.size(); j++)
logger.info (target.get(j).getText()+" "+data.get(j).getText());
}
} catch (Exception e) {
System.out.println (e);
e.printStackTrace();
}
}
示例4: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@Override
public Instance pipe(Instance carrier) {
TokenSequence tokenSequence = (TokenSequence) carrier.getData();
int prevCount = 0;
for (int i = 0; i < tokenSequence.size(); i++) {
Token token = tokenSequence.get(i);
String tokenText = token.getText().split(this.csvSeparator)[0];
int count = 0;
Matcher matcher = this.pattern.matcher(tokenText);
while (matcher.find()) {
count++;
}
// int count = StringUtils.countMatches(tokenText, this.subString);
if (count < prevCount) {
token.setFeatureValue(this.feature, 1.0);
}
prevCount = count;
}
return carrier;
}
示例5: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
if (ts.size() > 3
&& (ts.get(2).getText().equals("-") || ts.get(3).getText().equals("-"))
&& ts.get(1).getText().matches("[A-Z]+")) {
String header = ts.get(1).getText();
if (header.equals("PRESS")) // Don't bother with "PRESS DIGEST" headers
return carrier;
String featureName = "HEADER="+header;
for (int i = 0; i < ts.size(); i++) {
Token t = ts.get(i);
// Only apply this feature to capitalized words, because if we apply it to everything
// we easily get an immense number of possible feature conjunctions, (e.g. every word
// with each of these HEADER= features.
if (t.getText().matches("^[A-Z].*"))
t.setFeatureValue (featureName, 1.0);
}
}
return carrier;
}
示例6: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier)
{
StringTokenization ts = (StringTokenization) carrier.getData();
StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ());
final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet();
LabelSequence labelSeq = new LabelSequence(dict);
Label start = dict.lookupLabel ("start");
Label notstart = dict.lookupLabel ("notstart");
boolean lastWasSpace = true;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < ts.size(); i++) {
StringSpan t = (StringSpan) ts.getSpan(i);
if (t.getText().equals(" "))
lastWasSpace = true;
else {
sb.append(t.getText());
newTs.add(t);
labelSeq.add(lastWasSpace ? "start" : "notstart");
lastWasSpace = false;
}
}
if (isTargetProcessing())
carrier.setTarget(labelSeq);
carrier.setData(newTs);
carrier.setSource(sb.toString());
return carrier;
}
示例7: preProcess
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public BitSet preProcess(InstanceList data) {
// count
int ii = 0;
int fi;
FeatureVector fv;
BitSet bitSet = new BitSet(data.size());
for (Instance instance : data) {
FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
for (int ip = 0; ip < fvs.size(); ip++) {
fv = fvs.get(ip);
for (int loc = 0; loc < fv.numLocations(); loc++) {
fi = fv.indexAtLocation(loc);
if (constraints.containsKey(fi)) {
constraints.get(fi).count += 1;
bitSet.set(ii);
}
}
if (constraints.containsKey(fv.getAlphabet().size())) {
bitSet.set(ii);
constraints.get(fv.getAlphabet().size()).count += 1;
}
}
ii++;
}
return bitSet;
}
示例8: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe(Instance carrier) {
if (!(carrier.getData() instanceof FeatureVector)) {
System.out.println(carrier.getData().getClass());
throw new IllegalArgumentException("Data must be of type FeatureVector not " + carrier.getData().getClass() + " we got " + carrier.getData());
}
if (min.length != getDataAlphabet().size()
|| max.length != getDataAlphabet().size()) {
throw new GateRuntimeException("Size mismatch, alphabet="+getDataAlphabet().size()+", stats="+min.length);
}
FeatureVector fv = (FeatureVector) carrier.getData();
int[] indices = fv.getIndices();
double[] values = fv.getValues();
for (int i = 0; i < indices.length; i++) {
int index = indices[i];
double mi = min[index];
double ma = max[index];
double span = ma - mi;
if(normalize[index] && span > 0.0) {
double value = values[i];
// NOTE: this could in theory cause an overflow error but we ignore this here!
double newvalue = (value - mi) / span;
fv.setValue(index, newvalue);
}
}
return carrier;
}
示例9: getFromMallet
import cc.mallet.types.Instance; //导入方法依赖的package包/类
/**
* Create libsvm representation from Mallet.
*
* @param instances
* @return
*/
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
InstanceList instances = crm.getRepresentationMallet();
svm_problem prob = new svm_problem();
int numTrainingInstances = instances.size();
prob.l = numTrainingInstances;
prob.y = new double[prob.l];
prob.x = new svm_node[prob.l][];
for (int i = 0; i < numTrainingInstances; i++) {
Instance instance = instances.get(i);
//Labels
// convert the target: if we get a label, convert to index,
// if we get a double, use it directly
Object tobj = instance.getTarget();
if (tobj instanceof Label) {
prob.y[i] = ((Label) instance.getTarget()).getIndex();
} else if (tobj instanceof Double) {
prob.y[i] = (double) tobj;
} else {
throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
}
//Features
SparseVector data = (SparseVector) instance.getData();
int[] indices = data.getIndices();
double[] values = data.getValues();
prob.x[i] = new svm_node[indices.length];
for (int j = 0; j < indices.length; j++) {
svm_node node = new svm_node();
node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
node.value = values[j];
prob.x[i][j] = node;
}
}
return prob;
}
示例10: gatherConstraints
import cc.mallet.types.Instance; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
protected void gatherConstraints(
PRAuxiliaryModel auxModel, double[][][][] cachedDots) {
initialProbList = new ArrayList<double[]>();
finalProbList = new ArrayList<double[]>();
transitionProbList = new ArrayList<double[][][]>();
constraints = new CRF.Factors(crf.getParameters());
expectations = new CRF.Factors(crf.getParameters());
constraints.zero();
for (int ii = 0; ii < trainingSet.size(); ii++) {
Instance inst = trainingSet.get(ii);
Sequence input = (Sequence) inst.getData();
SumLatticePR geLatt =
new SumLatticePR(crf, ii, input, null, auxModel, cachedDots[ii], false, null, null, true);
double gammas[][] = geLatt.getGammas();
double initialProbs[] = toProbabilities(gammas[0]);
initialProbList.add(initialProbs);
double finalProbs[] = toProbabilities(gammas[gammas.length - 1]);
finalProbList.add(finalProbs);
double transitionProbs[][][] = geLatt.getXis();
toProbabilities(transitionProbs);
transitionProbList.add(transitionProbs);
new SumLatticeKL(crf, input, initialProbs,
finalProbs, transitionProbs, null, constraints.new Incrementor());
}
}
示例11: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
CharSequence string = (CharSequence) carrier.getData();
Matcher m = regex.matcher(string);
if (m.find()) {
//System.out.println ("CharSubsequence found match");
carrier.setData(m.group(groupIndex));
return carrier;
} else {
//System.out.println ("CharSubsequence found no match");
carrier.setData("");
return carrier;
}
}
示例12: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
AugmentableFeatureVector afv = (AugmentableFeatureVector)carrier.getData();
double v;
for (int i = afv.numLocations() - 1; i >= 0; i--) {
v = afv.valueAtLocation (i);
if (v >= 1)
afv.setValueAtLocation (i, Math.log(v)+1);
}
return carrier;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:AugmentableFeatureVectorLogScale.java
示例13: pipe
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
int tsSize = ts.size();
PropertyList[] newFeatures = new PropertyList[tsSize];
for (int i = 0; i < tsSize; i++) {
Token t = ts.get (i);
PropertyList pl = t.getFeatures();
newFeatures[i] = pl;
for (int position = i + leftBoundary; position < i + rightBoundary; position++) {
if (position == i && !includeCurrentToken)
continue;
PropertyList pl2;
if (position < 0)
pl2 = startfs[-position];
else if (position >= tsSize)
pl2 = endfs[position-tsSize];
else
pl2 = ts.get(position).getFeatures ();
PropertyList.Iterator pl2i = pl2.iterator();
while (pl2i.hasNext()) {
pl2i.next();
String key = pl2i.getKey();
if (featureRegex == null || featureRegex.matcher(key).matches()) {
newFeatures[i] = PropertyList.add ((namePrefixLeft == null || position-i>0 ? namePrefix : namePrefixLeft)+key,
pl2i.getNumericValue(), newFeatures[i]);
}
}
}
}
for (int i = 0; i < tsSize; i++) {
// Put the new PropertyLists in place
ts.get (i).setFeatures (newFeatures[i]);
}
return carrier;
}
示例14: getClassificationScores
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public void getClassificationScores (Instance instance, double[] scores)
{
FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
int numFeatures = instance.getDataAlphabet().size()+1;
int numLabels = fvs.size();
assert (scores.length == fvs.size());
for (int instanceNumber=0; instanceNumber < fvs.size(); instanceNumber++) {
FeatureVector fv = (FeatureVector)fvs.get(instanceNumber);
// Make sure the feature vector's feature dictionary matches
// what we are expecting from our data pipe (and thus our notion
// of feature probabilities.
assert (fv.getAlphabet ()
== this.instancePipe.getDataAlphabet ());
// Include the feature weights according to each label
scores[instanceNumber] = parameters[0*numFeatures + defaultFeatureIndex]
+ MatrixOps.rowDotProduct (parameters, numFeatures,
0, fv,
defaultFeatureIndex,
(perClassFeatureSelection == null
? featureSelection
: perClassFeatureSelection[0]));
}
// Move scores to a range where exp() is accurate, and normalize
double max = MatrixOps.max (scores);
double sum = 0;
for (int li = 0; li < numLabels; li++)
sum += (scores[li] = Math.exp (scores[li] - max));
for (int li = 0; li < numLabels; li++) {
scores[li] /= sum;
// xxxNaN assert (!Double.isNaN(scores[li]));
}
}
示例15: evaluateInstanceList
import cc.mallet.types.Instance; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer trainer, InstanceList instances, String description)
{
int numCorrectTokens;
int totalTokens;
Transducer transducer = trainer.getTransducer();
totalTokens = numCorrectTokens = 0;
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
//System.err.println ("TokenAccuracyEvaluator "+i+" length="+input.size());
Sequence predOutput = transducer.transduce (input);
assert (predOutput.size() == trueOutput.size());
for (int j = 0; j < trueOutput.size(); j++) {
totalTokens++;
if (trueOutput.get(j).equals(predOutput.get(j)))
numCorrectTokens++;
}
//System.err.println ("TokenAccuracyEvaluator "+i+" numCorrectTokens="+numCorrectTokens+" totalTokens="+totalTokens+" accuracy="+((double)numCorrectTokens)/totalTokens);
}
double acc = ((double)numCorrectTokens)/totalTokens;
//System.err.println ("TokenAccuracyEvaluator accuracy="+acc);
accuracy.put(description, acc);
logger.info (description +" accuracy="+acc);
}