本文整理汇总了Java中cc.mallet.types.InstanceList.getPipe方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.getPipe方法的具体用法?Java InstanceList.getPipe怎么用?Java InstanceList.getPipe使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.InstanceList
的用法示例。
在下文中一共展示了InstanceList.getPipe方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: train
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public C45 train (InstanceList trainingList)
{
FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
if (selectedFeatures != null)
// xxx Attend to FeatureSelection!!!
throw new UnsupportedOperationException ("FeatureSelection not yet implemented.");
C45.Node root = new C45.Node(trainingList, null, m_minNumInsts);
splitTree(root, 0);
C45 tree = new C45 (trainingList.getPipe(), root);
logger.info("C45 learned: (size=" + tree.getSize() + ")\n");
tree.print();
if (m_doPruning) {
tree.prune();
logger.info("\nPruned C45: (size=" + tree.getSize() + ")\n");
root.print();
}
root.stopGrowth();
this.classifier = tree;
return classifier;
}
示例2: getCRF
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static CRF getCRF(InstanceList training, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected) {
Pattern forbiddenPat = Pattern.compile(forbidden);
Pattern allowedPat = Pattern.compile(allowed);
CRF crf = new CRF(training.getPipe(), (Pipe)null);
String startName = crf.addOrderNStates(training, orders, null,
defaultLabel, forbiddenPat, allowedPat, connected);
for (int i = 0; i < crf.numStates(); i++)
crf.getState(i).setInitialWeight (Transducer.IMPOSSIBLE_WEIGHT);
crf.getState(startName).setInitialWeight(0.0);
crf.setWeightsDimensionDensely();
return crf;
}
示例3: testRandomTrainedOn
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private double testRandomTrainedOn (InstanceList training)
{
ClassifierTrainer trainer = new MaxEntTrainer ();
Alphabet fd = dictOfSize (3);
String[] classNames = new String[] {"class0", "class1", "class2"};
Randoms r = new Randoms (1);
Iterator<Instance> iter = new RandomTokenSequenceIterator (r, new Dirichlet(fd, 2.0),
30, 0, 10, 200, classNames);
training.addThruPipe (iter);
InstanceList testing = new InstanceList (training.getPipe ());
testing.addThruPipe (new RandomTokenSequenceIterator (r, new Dirichlet(fd, 2.0),
30, 0, 10, 200, classNames));
System.out.println ("Training set size = "+training.size());
System.out.println ("Testing set size = "+testing.size());
Classifier classifier = trainer.train (training);
System.out.println ("Accuracy on training set:");
System.out.println (classifier.getClass().getName()
+ ": " + new Trial (classifier, training).getAccuracy());
System.out.println ("Accuracy on testing set:");
double testAcc = new Trial (classifier, testing).getAccuracy();
System.out.println (classifier.getClass().getName()
+ ": " + testAcc);
return testAcc;
}
示例4: TokenClassifiers
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public TokenClassifiers(ClassifierTrainer trainer, InstanceList trainList, int randSeed, int numCV)
{
super(trainList.getPipe());
m_trainer = trainer;
m_randSeed = randSeed;
m_numCV = numCV;
m_table = new HashMap();
doTraining(trainList);
}
示例5: MaxEntOptimizableByGE
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
* @param trainingList List with unlabeled training instances.
* @param constraints Feature expectation constraints.
* @param initClassifier Initial classifier.
*/
public MaxEntOptimizableByGE(InstanceList trainingList, ArrayList<MaxEntGEConstraint> constraints, MaxEnt initClassifier) {
temperature = 1.0;
objWeight = 1.0;
gaussianPriorVariance = 1.0;
this.trainingList = trainingList;
int numFeatures = trainingList.getDataAlphabet().size();
defaultFeatureIndex = numFeatures;
int numLabels = trainingList.getTargetAlphabet().size();
cachedGradient = new double[(numFeatures + 1) * numLabels];
cachedValue = 0;
if (initClassifier != null) {
this.parameters = initClassifier.parameters;
this.classifier = initClassifier;
}
else {
this.parameters = new double[(numFeatures + 1) * numLabels];
this.classifier = new MaxEnt(trainingList.getPipe(),parameters);
}
this.constraints = constraints;
for (MaxEntGEConstraint constraint : constraints) {
constraint.preProcess(trainingList);
}
}
示例6: train
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public DecisionTree train (InstanceList trainingList) {
FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
DecisionTree.Node root = new DecisionTree.Node (trainingList, null, selectedFeatures);
splitTree (root, selectedFeatures, 0);
root.stopGrowth();
finished = true;
System.out.println ("DecisionTree learned:");
root.print();
this.classifier = new DecisionTree (trainingList.getPipe(), root);
return classifier;
}
示例7: train
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public NaiveBayes train (InstanceList trainingSet)
{
// Get a classifier trained on the labeled examples only
NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet);
double prevLogLikelihood = 0, logLikelihood = 0;
boolean converged = false;
int iteration = 0;
while (!converged) {
// Make a new trainingSet that has some labels set
InstanceList trainingSet2 = new InstanceList (trainingSet.getPipe());
for (int ii = 0; ii < trainingSet.size(); ii++) {
Instance inst = trainingSet.get(ii);
if (inst.getLabeling() != null)
trainingSet2.add(inst, 1.0);
else {
Instance inst2 = inst.shallowCopy();
inst2.unLock();
inst2.setLabeling(c.classify(inst).getLabeling());
inst2.lock();
trainingSet2.add(inst2, unlabeledDataWeight);
}
}
c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2);
logLikelihood = c.dataLogLikelihood (trainingSet2);
System.err.println ("Loglikelihood = "+logLikelihood);
// Wait for a change in log-likelihood of less than 0.01% and at least 10 iterations
if (Math.abs((logLikelihood - prevLogLikelihood)/logLikelihood) < 0.0001)
converged = true;
prevLogLikelihood = logLikelihood;
iteration++;
}
return c;
}
示例8: combineLists
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
* @param li
* @param lj
* @return A new {@link InstanceList} where <code>lj</code> is appended to <code>li</code>.
*/
public static InstanceList combineLists (InstanceList li,
InstanceList lj) {
InstanceList newList = new InstanceList(li.getPipe());
for (int i = 0; i < li.size(); i++)
newList.add(li.get(i));
for (int i = 0; i < lj.size(); i++)
newList.add(lj.get(i));
return newList;
}
示例9: DMROptimizable
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {
this.trainingList = instances;
Alphabet alphabet = instances.getDataAlphabet();
Alphabet labelAlphabet = instances.getTargetAlphabet();
this.numLabels = labelAlphabet.size();
// Add one feature for the "default feature".
this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);
this.defaultFeatureIndex = numFeatures - 1;
this.parameters = new double [numLabels * numFeatures];
//this.constraints = new double [numLabels * numFeatures];
this.cachedGradient = new double [numLabels * numFeatures];
if (initialClassifier != null) {
this.classifier = initialClassifier;
this.parameters = classifier.getParameters();
this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
assert (initialClassifier.getInstancePipe() == instances.getPipe());
}
else if (this.classifier == null) {
this.classifier =
new MaxEnt (instances.getPipe(), parameters);
}
formatter = new DecimalFormat("0.###E0");
cachedValueStale = true;
cachedGradientStale = true;
// Initialize the constraints
logger.fine("Number of instances in training list = " + trainingList.size());
for (Instance instance : trainingList) {
FeatureVector multinomialValues = (FeatureVector) instance.getTarget();
if (multinomialValues == null)
continue;
FeatureVector features = (FeatureVector) instance.getData();
assert (features.getAlphabet() == alphabet);
boolean hasNaN = false;
for (int i = 0; i < features.numLocations(); i++) {
if (Double.isNaN(features.valueAtLocation(i))) {
logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString());
hasNaN = true;
}
}
if (hasNaN) {
logger.info("NaN in instance: " + instance.getName());
}
}
//TestMaximizable.testValueAndGradientCurrentParameters (this);
}
示例10: train
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
* Trains winnow on the instance list, updating
* {@link #weights weights} according to errors
* @param ilist Instance list to be trained on
* @return Classifier object containing learned weights
*/
public Winnow train (InstanceList trainingList)
{
FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
if (selectedFeatures != null)
// xxx Attend to FeatureSelection!!!
throw new UnsupportedOperationException ("FeatureSelection not yet implemented.");
// if "train" is run more than once,
// we will be reinitializing the weights
// TODO: provide method to save weights
trainingList.getDataAlphabet().stopGrowth();
trainingList.getTargetAlphabet().stopGrowth();
Pipe dataPipe = trainingList.getPipe ();
Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
int numLabels = trainingList.getTargetAlphabet().size();
int numFeats = dict.size();
this.theta = numFeats * this.nfactor;
this.weights = new double [numLabels][numFeats];
// init weights to 1
for(int i=0; i<numLabels; i++)
for(int j=0; j<numFeats; j++)
this.weights[i][j] = 1.0;
//System.out.println("Init weights to 1. Theta= "+theta);
// loop through all instances
for (int ii = 0; ii < trainingList.size(); ii++){
Instance inst = (Instance) trainingList.get(ii);
Labeling labeling = inst.getLabeling ();
FeatureVector fv = (FeatureVector) inst.getData ();
double[] results = new double [numLabels];
int fvisize = fv.numLocations();
int correctIndex = labeling.getBestIndex();
for(int rpos=0; rpos < numLabels; rpos++)
results[rpos]=0;
// sum up xi*wi for each class
for(int fvi=0; fvi < fvisize; fvi++){
int fi = fv.indexAtLocation(fvi);
//System.out.println("feature index "+fi);
for(int lpos=0; lpos < numLabels; lpos++)
results[lpos] += this.weights[lpos][fi];
}
//System.out.println("In instance " + ii);
// make guess for each label using threshold
// update weights according to alpha and beta
// upon incorrect guess
for(int ri=0; ri < numLabels; ri++){
if(results[ri] > this.theta){ // guess 1
if(correctIndex != ri) // correct is 0
demote(ri, fv);
}
else{ // guess 0
if(correctIndex == ri) // correct is 1
promote(ri, fv);
}
}
// System.out.println("Results guessed:")
// for(int x=0; x<numLabels; x++)
// System.out.println(results[x]);
// System.out.println("Correct label: "+correctIndex );
// System.out.println("Weights are");
// for(int h=0; h<numLabels; h++){
// for(int g=0; g<numFeats; g++)
// System.out.println(weights[h][g]);
// System.out.println("");
// }
}
classifier = new Winnow (dataPipe, weights, theta, numLabels, numFeats);
return classifier;
}
示例11: setup
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private void setup (InstanceList instances, Instance instance) {
assert (instances != null || instance != null);
if (instance == null && instances != null)
instance = instances.get(0);
// Initialize the alphabets
if (dataAlphabet == null) {
this.dataAlphabet = instance.getDataAlphabet();
this.targetAlphabet = instance.getTargetAlphabet();
} else if (!Alphabet.alphabetsMatch(instance, this))
// Make sure the alphabets match
throw new IllegalArgumentException ("Training set alphabets do not match those of NaiveBayesTrainer.");
// Initialize or check the instancePipe
if (instances != null) {
if (instancePipe == null)
instancePipe = instances.getPipe();
else if (instancePipe != instances.getPipe())
// Make sure that this pipes match. Is this really necessary??
// I don't think so, but it could be confusing to have each returned classifier have a different pipe? -akm 1/08
throw new IllegalArgumentException ("Training set pipe does not match that of NaiveBayesTrainer.");
}
if (me == null) {
int numLabels = targetAlphabet.size();
me = new Multinomial.Estimator[numLabels];
for (int i = 0; i < numLabels; i++) {
me[i] = (Multinomial.Estimator) featureEstimator.clone();
me[i].setAlphabet(dataAlphabet);
}
pe = (Multinomial.Estimator) priorEstimator.clone();
}
if (targetAlphabet.size() > me.length) {
// target alphabet grew. increase size of our multinomial array
int targetAlphabetSize = targetAlphabet.size();
// copy over old values
Multinomial.Estimator[] newMe = new Multinomial.Estimator[targetAlphabetSize];
System.arraycopy (me, 0, newMe, 0, me.length);
// initialize new expanded space
for (int i= me.length; i<targetAlphabetSize; i++){
Multinomial.Estimator mest = (Multinomial.Estimator)featureEstimator.clone ();
mest.setAlphabet (dataAlphabet);
newMe[i] = mest;
}
me = newMe;
}
}