本文整理汇总了Java中cc.mallet.pipe.CharSequence2TokenSequence类的典型用法代码示例。如果您正苦于以下问题:Java CharSequence2TokenSequence类的具体用法?Java CharSequence2TokenSequence怎么用?Java CharSequence2TokenSequence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CharSequence2TokenSequence类属于cc.mallet.pipe包,在下文中一共展示了CharSequence2TokenSequence类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testPrint
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testPrint() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."), new TokenText(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(), });
InstanceList one = new InstanceList(p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe(new ArrayIterator(data));
CRF crf = new CRF(p, null);
crf.addFullyConnectedStatesForThreeQuarterLabels(one);
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crf.setWeightsDimensionAsIn(one, false);
Optimizable mcrf = crft.getOptimizableCRF(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params[i] = i;
}
mcrf.setParameters(params);
crf.print();
}
示例2: makeSpacePredictionPipe
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例3: disabledtestPrint
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例4: getPipelist
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private static List<Pipe> getPipelist(Collection<String> stopwords) {
// Begin by importing documents from text to feature sequences
List<Pipe> pipeList = new ArrayList<Pipe>();
// Pipes: lowercase, tokenize, remove stopwords, map to features
pipeList.add(new CharSequenceLowercase());
pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")));
TokenSequenceRemoveStopwords stopwordsRemovalPipe =
new TokenSequenceRemoveStopwords(new File("mallet/stoplists/en.txt"), "UTF-8", false, false, false);
if(!stopwords.isEmpty()) {
stopwordsRemovalPipe.addStopWords(stopwords.toArray(new String[stopwords.size()]));
}
pipeList.add(stopwordsRemovalPipe);
pipeList.add(new TokenSequence2FeatureSequence());
return pipeList;
}
示例5: main
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public static void main(String[] args) throws IOException, Exception {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new CharSequence2TokenSequence());
pipes.add(new TokenSequence2FeatureSequence());
pipes.add(new FeatureSequence2FeatureVector());
SerialPipes pipe = new SerialPipes(pipes);
//prepare training instances
InstanceList trainingInstanceList = new InstanceList(pipe);
trainingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-train-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
//prepare test instances
InstanceList testingInstanceList = new InstanceList(pipe);
testingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-test-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
ClassifierTrainer trainer = new SVMClassifierTrainer(new LinearKernel());
Classifier classifier = trainer.train(trainingInstanceList);
System.out.println("Accuracy: " + classifier.getAccuracy(testingInstanceList));
}
示例6: disabledtestPrint
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例7: createInstanceList
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
/**
* Creates a list of Malelt instances from a list of documents
* @param texts a list of documents
* @return a list of Mallet instances
* @throws IOException
*/
private InstanceList createInstanceList(List<String> texts) throws IOException
{
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new CharSequence2TokenSequence());
pipes.add(new TokenSequenceLowercase());
pipes.add(new TokenSequenceRemoveStopwords());
pipes.add(new TokenSequence2FeatureSequence());
InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
instanceList.addThruPipe(new ArrayIterator(texts));
return instanceList;
}
示例8: makeSpacePredictionPipe
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
{ -1, 0 },
// Original test had this conjunction in it too
// {1},{-1,0},{0,1},
// {0, 1},
// I'd like to comment out this next line to make it run
// faster, but then we'd need to adjust likelihood and
// accuracy test values. -akm 12/2007
// TODO uncomment this line
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
// (These were commented before...)
// {-2,-1}, {-1,0}, {0,1}, {1,2},
// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence() });
return p;
}
示例9: createInstanceList
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private InstanceList createInstanceList(List<String> texts) throws IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
//pipes.add(new CharSequence2TokenSequence());
pipes.add(new CharSequence2TokenSequence("([a-zA-Z0-9]|[-]|[~]|[_]|[.]|[/]|ä|ü|ö|Ä|Ö|Ü|ß|<|>|)+"));
pipes.add(new TokenSequenceLowercase());
//pipes.add(new TokenSequenceRemoveStopwords());
pipes.add(new TokenSequence2FeatureSequence());
InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
instanceList.addThruPipe(new ArrayIterator(texts));
return instanceList;
}
示例10: getPipe
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
/**
*
* @param model
* @param targetProcessing
* @return
*/
private Pipe getPipe() {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new SaveDataInSource());
pipes.add(new Input2CharSequence("UTF-8"));
pipes.add(new CharSequence2TokenSequence(Pattern.compile("\\p{Alpha}+")));
pipes.add(new TokenSequenceLowercase());
pipes.add(new TokenSequenceRemoveStopwords(false, false));
pipes.add(new TokenSequence2FeatureSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
示例11: testThree
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testThree ()
{
InstanceList il = new InstanceList (
new SerialPipes(new Pipe[] {
new Target2Label(),
new CharSequence2TokenSequence(),
new TokenSequenceLowercase(),
new TokenSequenceRemoveStopwords(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector()
}));
Iterator<Instance> pi = new FileIterator(new File("foo/bar"), null, Pattern.compile("^([^/]*)/"));
il.addThruPipe (pi);
}
示例12: createPipe
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public Pipe createPipe () {
return new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence(),
new TokenSequenceLowercase(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector()});
}
示例13: testSpacePipe
import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testSpacePipe () {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRF.TestCRFTokenSequenceRemoveSpaces (),
new TokenText(),
new OffsetConjunctions(false,
new int[][] {{0},
{1},{-1,0},{0,1},
{-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3},
}),
new PrintInputAndTarget(),
});
// Print to a string
ByteArrayOutputStream out = new ByteArrayOutputStream ();
PrintStream oldOut = System.out;
System.setOut (new PrintStream (out));
InstanceList lst = new InstanceList (p);
lst.addThruPipe (new ArrayIterator(new String[] { TestCRF.data[0],
TestCRF.data[1], }));
System.setOut (oldOut);
assertEquals (spacePipeOutput, out.toString());
}