本文整理汇总了Java中cc.mallet.pipe.Target2Label类的典型用法代码示例。如果您正苦于以下问题:Java Target2Label类的具体用法?Java Target2Label怎么用?Java Target2Label使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Target2Label类属于cc.mallet.pipe包,在下文中一共展示了Target2Label类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: InstanceList
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
/**
* Creates a list consisting of randomly-generated
* <code>FeatureVector</code>s.
*/
// xxx Perhaps split these out into a utility class
public InstanceList (Randoms r,
// the generator of all random-ness used here
Dirichlet classCentroidDistribution,
// includes a Alphabet
double classCentroidAverageAlphaMean,
// Gaussian mean on the sum of alphas
double classCentroidAverageAlphaVariance,
// Gaussian variance on the sum of alphas
double featureVectorSizePoissonLambda,
double classInstanceCountPoissonLambda,
String[] classNames)
{
this (new SerialPipes (new Pipe[] {
new TokenSequence2FeatureSequence (),
new FeatureSequence2FeatureVector (),
new Target2Label()}));
//classCentroidDistribution.print();
Iterator<Instance> iter = new RandomTokenSequenceIterator (
r, classCentroidDistribution,
classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance,
featureVectorSizePoissonLambda, classInstanceCountPoissonLambda,
classNames);
this.addThruPipe (iter);
}
示例2: main
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
public static void main(String[] args) throws IOException, Exception {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new CharSequence2TokenSequence());
pipes.add(new TokenSequence2FeatureSequence());
pipes.add(new FeatureSequence2FeatureVector());
SerialPipes pipe = new SerialPipes(pipes);
//prepare training instances
InstanceList trainingInstanceList = new InstanceList(pipe);
trainingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-train-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
//prepare test instances
InstanceList testingInstanceList = new InstanceList(pipe);
testingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-test-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
ClassifierTrainer trainer = new SVMClassifierTrainer(new LinearKernel());
Classifier classifier = trainer.train(trainingInstanceList);
System.out.println("Accuracy: " + classifier.getAccuracy(testingInstanceList));
}
示例3: makePipe
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2Label labelPipe = new Target2Label();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignToStressPipe(alpha, labelAlpha,
ImmutableList.<StressFeature>of()
), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例4: getPipe
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
/**
*
* @param model
* @param targetProcessing
* @return
*/
private Pipe getPipe() {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new SaveDataInSource());
pipes.add(new Input2CharSequence("UTF-8"));
pipes.add(new CharSequence2TokenSequence(Pattern.compile("\\p{Alpha}+")));
pipes.add(new TokenSequenceLowercase());
pipes.add(new TokenSequenceRemoveStopwords(false, false));
pipes.add(new TokenSequence2FeatureSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
示例5: testRandomTrained
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
public void testRandomTrained ()
{
Pipe p = new SerialPipes(new Pipe[] {
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector(),
new Target2Label()});
double testAcc1 = testRandomTrainedOn (new InstanceList (p));
double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File(".")));
assertEquals (testAcc1, testAcc2, 0.01);
}
示例6: testThree
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
public void testThree ()
{
InstanceList il = new InstanceList (
new SerialPipes(new Pipe[] {
new Target2Label(),
new CharSequence2TokenSequence(),
new TokenSequenceLowercase(),
new TokenSequenceRemoveStopwords(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector()
}));
Iterator<Instance> pi = new FileIterator(new File("foo/bar"), null, Pattern.compile("^([^/]*)/"));
il.addThruPipe (pi);
}
示例7: getPipes
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
static List<Pipe> getPipes() {
List<Pipe> pipes = newArrayList();
pipes.add(new Target2Label());
pipes.add(new MyInput2RegexTokens());
// pipes.add(new PrintInputAndTarget());
pipes.add(new TokenSequence2FeatureSequence());
pipes.add(new FeatureSequence2FeatureVector());
return pipes;
}
示例8: buildPipe
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
public Pipe buildPipe() {
ArrayList pipeList = new ArrayList();
// Read data from File objects
pipeList.add(new Input2CharSequence("UTF-8"));
// Regular expression for what constitutes a token.
// This pattern includes Unicode letters, Unicode numbers,
// and the underscore character. Alternatives:
// "\\S+" (anything not whitespace)
// "\\w+" ( A-Z, a-z, 0-9, _ )
// "[\\p{L}\\p{N}_]+|[\\p{P}]+" (a group of only letters and numbers OR
// a group of only punctuation marks)
Pattern tokenPattern =
Pattern.compile("[\\p{L}\\p{N}_]+");
// Tokenize raw strings
pipeList.add(new CharSequence2TokenSequence(tokenPattern));
// Normalize all tokens to all lowercase
pipeList.add(new TokenSequenceLowercase());
// Remove stopwords from a standard English stoplist.
// options: [case sensitive] [mark deletions]
pipeList.add(new TokenSequenceRemoveStopwords(false, false));
// Rather than storing tokens as strings, convert
// them to integers by looking them up in an alphabet.
pipeList.add(new TokenSequence2FeatureSequence());
// Do the same thing for the "target" field:
// convert a class label string to a Label object,
// which has an index in a Label alphabet.
pipeList.add(new Target2Label());
// Now convert the sequence of features to a sparse vector,
// mapping feature IDs to counts.
pipeList.add(new FeatureSequence2FeatureVector());
// Print out the features and the label
//pipeList.add(new PrintInputAndTarget());
return new SerialPipes(pipeList);
}
示例9: createInstanceList
import cc.mallet.pipe.Target2Label; //导入依赖的package包/类
public InstanceList createInstanceList(File dataFile) throws IOException {
InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[] {
new Target2Label(),
new Csv2FeatureVector() }));
Reader fileReader = new FileReader(dataFile);
instanceList.addThruPipe(new DataIterator(fileReader));
fileReader.close();
return instanceList;
}