当前位置: 首页>>代码示例>>Java>>正文


Java CharSequence2TokenSequence类代码示例

本文整理汇总了Java中cc.mallet.pipe.CharSequence2TokenSequence的典型用法代码示例。如果您正苦于以下问题:Java CharSequence2TokenSequence类的具体用法?Java CharSequence2TokenSequence怎么用?Java CharSequence2TokenSequence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CharSequence2TokenSequence类属于cc.mallet.pipe包,在下文中一共展示了CharSequence2TokenSequence类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testPrint

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testPrint() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."), new TokenText(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenSequence2FeatureVectorSequence(),
			new PrintInputAndTarget(), });
	InstanceList one = new InstanceList(p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe(new ArrayIterator(data));
	CRF crf = new CRF(p, null);
	crf.addFullyConnectedStatesForThreeQuarterLabels(one);
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crf.setWeightsDimensionAsIn(one, false);
	Optimizable mcrf = crft.getOptimizableCRF(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params[i] = i;
	}
	mcrf.setParameters(params);
	crf.print();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:TestCRF.java

示例2: makeSpacePredictionPipe

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
	    new CharSequence2TokenSequence("."),
	    new TokenSequenceLowercase(),
	    new TestMEMMTokenSequenceRemoveSpaces(),
	    new TokenText(),
	    new OffsetConjunctions(true,
	                           new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
	                             {1}, {-1, 0}, {0, 1},
//	                             {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
	                             //{-2,-1}, {-1,0}, {0,1}, {1,2},
	                             //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
	                           }),
//      new PrintInputAndTarget(),
	    new TokenSequence2FeatureVectorSequence()
	  });
    return p;
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TestMEMM.java

示例3: disabledtestPrint

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
	Pipe p = new SerialPipes (new Pipe[] {
     new CharSequence2TokenSequence("."),
		 new TokenText(),
		 new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
		 new TokenSequence2FeatureVectorSequence(),
		 new PrintInputAndTarget(),
  });
	InstanceList one = new InstanceList (p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe (new ArrayIterator (data));
	MEMM crf = new MEMM (p, null);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn (one);
	MEMMTrainer memmt = new MEMMTrainer (crf);
	MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params [i] = i;
	}
	mcrf.setParameters (params);
	crf.print ();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:25,代码来源:TestMEMM.java

示例4: getPipelist

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private static List<Pipe> getPipelist(Collection<String> stopwords) {
	// Begin by importing documents from text to feature sequences
	List<Pipe> pipeList = new ArrayList<Pipe>();

	// Pipes: lowercase, tokenize, remove stopwords, map to features
	pipeList.add(new CharSequenceLowercase());
	pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")));
	
	TokenSequenceRemoveStopwords stopwordsRemovalPipe = 
			new TokenSequenceRemoveStopwords(new File("mallet/stoplists/en.txt"), "UTF-8", false, false, false);
	if(!stopwords.isEmpty()) {
		stopwordsRemovalPipe.addStopWords(stopwords.toArray(new String[stopwords.size()]));
	}
	pipeList.add(stopwordsRemovalPipe);
	pipeList.add(new TokenSequence2FeatureSequence());
			
	return pipeList;
}
 
开发者ID:MKLab-ITI,项目名称:mgraph-summarization,代码行数:19,代码来源:LDA.java

示例5: main

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public static void main(String[] args) throws IOException, Exception {
    ArrayList<Pipe> pipes = new ArrayList<Pipe>();
    pipes.add(new Target2Label());
    pipes.add(new CharSequence2TokenSequence());
    pipes.add(new TokenSequence2FeatureSequence());
    pipes.add(new FeatureSequence2FeatureVector());
    SerialPipes pipe = new SerialPipes(pipes);

    //prepare training instances
    InstanceList trainingInstanceList = new InstanceList(pipe);
    trainingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-train-stemmed.txt"),
            "(.*)\t(.*)", 2, 1, -1));

    //prepare test instances
    InstanceList testingInstanceList = new InstanceList(pipe);
    testingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-test-stemmed.txt"),
            "(.*)\t(.*)", 2, 1, -1));

    ClassifierTrainer trainer = new SVMClassifierTrainer(new LinearKernel());
    Classifier classifier = trainer.train(trainingInstanceList);
    System.out.println("Accuracy: " + classifier.getAccuracy(testingInstanceList));

}
 
开发者ID:iamxiatian,项目名称:wikit,代码行数:24,代码来源:Main.java

示例6: disabledtestPrint

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
	Pipe p = new SerialPipes (new Pipe[] {
     new CharSequence2TokenSequence("."),
		 new TokenText(),
		 new TestMEMMTokenSequenceRemoveSpaces(),
		 new TokenSequence2FeatureVectorSequence(),
		 new PrintInputAndTarget(),
  });
	InstanceList one = new InstanceList (p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe (new ArrayIterator (data));
	MEMM crf = new MEMM (p, null);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn (one);
	MEMMTrainer memmt = new MEMMTrainer (crf);
	MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params [i] = i;
	}
	mcrf.setParameters (params);
	crf.print ();
}
 
开发者ID:shalomeir,项目名称:tctm,代码行数:25,代码来源:TestMEMM.java

示例7: createInstanceList

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
/**
 * Creates a list of Malelt instances from a list of documents
 * @param texts a list of documents
 * @return a list of Mallet instances
 * @throws IOException
 */
private InstanceList createInstanceList(List<String> texts) throws IOException
{
	ArrayList<Pipe> pipes = new ArrayList<Pipe>();
	pipes.add(new CharSequence2TokenSequence());
	pipes.add(new TokenSequenceLowercase());
	pipes.add(new TokenSequenceRemoveStopwords());
	pipes.add(new TokenSequence2FeatureSequence());
	InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
	instanceList.addThruPipe(new ArrayIterator(texts));
	return instanceList;
}
 
开发者ID:socialsensor,项目名称:topic-detection,代码行数:18,代码来源:LDA.java

示例8: makeSpacePredictionPipe

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."),
			new TokenSequenceLowercase(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenText(),
			new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
					{ -1, 0 },

			// Original test had this conjunction in it too
					// {1},{-1,0},{0,1},
					// {0, 1},

					// I'd like to comment out this next line to make it run
					// faster, but then we'd need to adjust likelihood and
					// accuracy test values. -akm 12/2007
					// TODO uncomment this line
					// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},

					// (These were commented before...)
					// {-2,-1}, {-1,0}, {0,1}, {1,2},
					// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
					}),
			// new PrintInputAndTarget(),
			new TokenSequence2FeatureVectorSequence() });
	return p;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:TestCRF.java

示例9: createInstanceList

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
private InstanceList createInstanceList(List<String> texts) throws IOException {
	ArrayList<Pipe> pipes = new ArrayList<Pipe>();
	//pipes.add(new CharSequence2TokenSequence());
	pipes.add(new CharSequence2TokenSequence("([a-zA-Z0-9]|[-]|[~]|[_]|[.]|[/]|ä|ü|ö|Ä|Ö|Ü|ß|<|>|)+"));
	pipes.add(new TokenSequenceLowercase());
	//pipes.add(new TokenSequenceRemoveStopwords());
	pipes.add(new TokenSequence2FeatureSequence());
	InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
	instanceList.addThruPipe(new ArrayIterator(texts));
	return instanceList;		        				
}
 
开发者ID:HendrikStrobelt,项目名称:ditop_wrangler,代码行数:12,代码来源:MalletLDA.java

示例10: getPipe

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
/**
 * 
 * @param model
 * @param targetProcessing
 * @return
 */
private Pipe getPipe() {
	ArrayList<Pipe> pipes = new ArrayList<Pipe>();
	pipes.add(new Target2Label());
	pipes.add(new SaveDataInSource());
	pipes.add(new Input2CharSequence("UTF-8"));
	pipes.add(new CharSequence2TokenSequence(Pattern.compile("\\p{Alpha}+")));
	pipes.add(new TokenSequenceLowercase());
	pipes.add(new TokenSequenceRemoveStopwords(false, false));
	pipes.add(new TokenSequence2FeatureSequence());
	// pipes.add(new PrintInputAndTarget());
	return new SerialPipes(pipes);
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:19,代码来源:MalletWrapper.java

示例11: testThree

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testThree ()
{
	InstanceList il = new InstanceList (
		new SerialPipes(new Pipe[] {
			new Target2Label(),
			new CharSequence2TokenSequence(),
			new TokenSequenceLowercase(),
			new TokenSequenceRemoveStopwords(),
			new TokenSequence2FeatureSequence(),
			new FeatureSequence2FeatureVector()
		}));
	Iterator<Instance> pi = new FileIterator(new File("foo/bar"), null, Pattern.compile("^([^/]*)/"));
	il.addThruPipe (pi);
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:15,代码来源:TestRainbowStyle.java

示例12: createPipe

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public Pipe createPipe () {
	return new SerialPipes(new Pipe[] {
			 new CharSequence2TokenSequence(),
			 new TokenSequenceLowercase(),
			 new TokenSequence2FeatureSequence(),
			 new FeatureSequence2FeatureVector()});
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:8,代码来源:TestInstancePipe.java

示例13: testSpacePipe

import cc.mallet.pipe.CharSequence2TokenSequence; //导入依赖的package包/类
public void testSpacePipe () {
	Pipe p = new SerialPipes(new Pipe[] {
		new CharSequence2TokenSequence("."),
		new TokenSequenceLowercase(),
		new TestCRF.TestCRFTokenSequenceRemoveSpaces (),
		new TokenText(),
		new OffsetConjunctions(false,
														new int[][] {{0},
																				 {1},{-1,0},{0,1},
																				 {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3},
														}),
		new PrintInputAndTarget(),
	});

	// Print to a string
	ByteArrayOutputStream out = new ByteArrayOutputStream ();
	PrintStream oldOut = System.out;
	System.setOut (new PrintStream (out));

	InstanceList lst = new InstanceList (p);
	lst.addThruPipe (new ArrayIterator(new String[] { TestCRF.data[0],
																						 TestCRF.data[1], }));

	System.setOut (oldOut);
	
	assertEquals (spacePipeOutput, out.toString());
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:28,代码来源:TestSpacePipe.java


注:本文中的cc.mallet.pipe.CharSequence2TokenSequence类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。