当前位置: 首页>>代码示例>>Java>>正文


Java TokenSequence2FeatureVectorSequence类代码示例

本文整理汇总了Java中cc.mallet.pipe.TokenSequence2FeatureVectorSequence的典型用法代码示例。如果您正苦于以下问题:Java TokenSequence2FeatureVectorSequence类的具体用法?Java TokenSequence2FeatureVectorSequence怎么用?Java TokenSequence2FeatureVectorSequence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TokenSequence2FeatureVectorSequence类属于cc.mallet.pipe包,在下文中一共展示了TokenSequence2FeatureVectorSequence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testPrint

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void testPrint() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."), new TokenText(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenSequence2FeatureVectorSequence(),
			new PrintInputAndTarget(), });
	InstanceList one = new InstanceList(p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe(new ArrayIterator(data));
	CRF crf = new CRF(p, null);
	crf.addFullyConnectedStatesForThreeQuarterLabels(one);
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crf.setWeightsDimensionAsIn(one, false);
	Optimizable mcrf = crft.getOptimizableCRF(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params[i] = i;
	}
	mcrf.setParameters(params);
	crf.print();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:TestCRF.java

示例2: makeSpacePredictionPipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
	    new CharSequence2TokenSequence("."),
	    new TokenSequenceLowercase(),
	    new TestMEMMTokenSequenceRemoveSpaces(),
	    new TokenText(),
	    new OffsetConjunctions(true,
	                           new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
	                             {1}, {-1, 0}, {0, 1},
//	                             {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
	                             //{-2,-1}, {-1,0}, {0,1}, {1,2},
	                             //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
	                           }),
//      new PrintInputAndTarget(),
	    new TokenSequence2FeatureVectorSequence()
	  });
    return p;
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TestMEMM.java

示例3: disabledtestPrint

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
	Pipe p = new SerialPipes (new Pipe[] {
     new CharSequence2TokenSequence("."),
		 new TokenText(),
		 new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
		 new TokenSequence2FeatureVectorSequence(),
		 new PrintInputAndTarget(),
  });
	InstanceList one = new InstanceList (p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe (new ArrayIterator (data));
	MEMM crf = new MEMM (p, null);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn (one);
	MEMMTrainer memmt = new MEMMTrainer (crf);
	MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params [i] = i;
	}
	mcrf.setParameters (params);
	crf.print ();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:25,代码来源:TestMEMM.java

示例4: build

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public Pipe build() {
	pipes = new LinkedList<Pipe>();
	pipes.add(new SimpleTaggerSentence2TokenSequence(false));
	addFeatures();
	pipes.add(new TokenSequence2FeatureVectorSequence(true, false));
	if (useSCL) {
		Pipe pipe = new SerialPipes(pipes);
		InstanceList sourceTrainInstances = new InstanceList(pipe);
		sourceTrainInstances.addThruPipe(sclSourceIt);
		InstanceList targetTrainInstances = new InstanceList(pipe);
		targetTrainInstances.addThruPipe(sclTargetIt);
		SCL scl = trainSCL(sourceTrainInstances, targetTrainInstances);
		pipes.removeLast();
		pipes.add(new TokenSequence2FeatureVectorSequence(false, true));
		pipes.add(new SCLAugment(scl));
	}
	return new SerialPipes(pipes);
}
 
开发者ID:siqil,项目名称:udaner,代码行数:19,代码来源:PipeBuilder.java

示例5: disabledtestPrint

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
	Pipe p = new SerialPipes (new Pipe[] {
     new CharSequence2TokenSequence("."),
		 new TokenText(),
		 new TestMEMMTokenSequenceRemoveSpaces(),
		 new TokenSequence2FeatureVectorSequence(),
		 new PrintInputAndTarget(),
  });
	InstanceList one = new InstanceList (p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe (new ArrayIterator (data));
	MEMM crf = new MEMM (p, null);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn (one);
	MEMMTrainer memmt = new MEMMTrainer (crf);
	MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params [i] = i;
	}
	mcrf.setParameters (params);
	crf.print ();
}
 
开发者ID:shalomeir,项目名称:tctm,代码行数:25,代码来源:TestMEMM.java

示例6: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
    Alphabet alpha = new Alphabet();
    Target2LabelSequence labelPipe = new Target2LabelSequence();
    LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

    return new SerialPipes(ImmutableList.of(
        new SWordConverterPipe(),
        new StringListToTokenSequence(alpha, labelAlpha),   // convert to token sequence
        new TokenSequenceLowercase(),                       // make all lowercase
        new PhoneNeighborPipe(true, makeNeighbors()),         // grab neighboring graphemes
        new PhoneClassPipe(true, makeClassNeighbors()),
        new VowelNeighborPipe(),
//          new SurroundingTokenFeature(false),
//          new SurroundingTokenFeature(true),
//          new NeighborShapeFeature(true, makeShapeNeighs()),
        new IsFirstPipe(),
        new ThisPhoneClassPipe(),
//        new AppendEndPipe(), // right before TS2F to get text set, last not to mess w neighbors
        new TokenSequenceToFeature(),                       // convert the strings in the text to features
        new TokenSequence2FeatureVectorSequence(alpha, true, false),
        labelPipe
    ));
  }
 
开发者ID:steveash,项目名称:jg2p,代码行数:24,代码来源:PhoneSyllTagTrainer.java

示例7: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
    Alphabet alpha = new Alphabet();
    Target2LabelSequence labelPipe = new Target2LabelSequence();
    LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

    return new SerialPipes(ImmutableList.of(
        new StringListToTokenSequence(alpha, labelAlpha),   // convert to token sequence
        new TokenSequenceLowercase(),                       // make all lowercase
        new NeighborTokenFeature(true, makeNeighbors()),         // grab neighboring graphemes
        new SurroundingTokenFeature(false),
//        new SurroundingTokenFeature(true),
        new NeighborShapeFeature(true, makeShapeNeighs()),
        new LeadingTrailingFeature(),
        new TokenSequenceToFeature(),                       // convert the strings in the text to features
        new TokenSequence2FeatureVectorSequence(alpha, true, true),
        labelPipe
    ));
  }
 
开发者ID:steveash,项目名称:jg2p,代码行数:19,代码来源:SyllTagTrainer.java

示例8: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
  Alphabet alpha = new Alphabet();
  Target2Label labelPipe = new Target2Label();
  LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

  return new SerialPipes(ImmutableList.of(
      new AlignToStressPipe(alpha, labelAlpha,
                            ImmutableList.<StressFeature>of()
      ),   // convert to token sequence
      new TokenSequenceLowercase(),                       // make all lowercase
      new NeighborTokenFeature(true, makeNeighbors()),         // grab neighboring graphemes
      new SurroundingTokenFeature(false),
      new SurroundingTokenFeature(true),
      new NeighborShapeFeature(true, makeShapeNeighs()),
      new LeadingTrailingFeature(),
      new TokenSequenceToFeature(),                       // convert the strings in the text to features
      new TokenSequence2FeatureVectorSequence(alpha, true, false),
      labelPipe
  ));
}
 
开发者ID:steveash,项目名称:jg2p,代码行数:21,代码来源:StressTrainer.java

示例9: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
  Alphabet alpha = new Alphabet();
  Target2LabelSequence labelPipe = new Target2LabelSequence();
  LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

  return new SerialPipes(ImmutableList.of(
      new StringListToTokenSequence(alpha, labelAlpha),   // convert to token sequence
      new TokenSequenceLowercase(),                       // make all lowercase
      new NeighborTokenFeature(true, makeNeighbors()),         // grab neighboring graphemes
      new SurroundingTokenFeature(false),
      new SurroundingTokenFeature(true),
      new NeighborShapeFeature(true, makeShapeNeighs()),
      new LeadingTrailingFeature(),
      new TokenSequenceToFeature(),                       // convert the strings in the text to features
      new TokenSequence2FeatureVectorSequence(alpha, true, false),
      labelPipe
  ));
}
 
开发者ID:steveash,项目名称:jg2p,代码行数:19,代码来源:AlignTagTrainer.java

示例10: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private static Pipe makePipe() {
  Alphabet alpha = new Alphabet();
  Target2LabelSequence labelPipe = new Target2LabelSequence();
  LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

  return new SerialPipes(ImmutableList.of(
      new StringListToTokenSequence(alpha, labelAlpha),   // convert to token sequence
      new TokenSequenceLowercase(),                       // make all lowercase
      new NeighborTokenFeature(true, makeNeighbors()),         // grab neighboring graphemes
      new NeighborShapeFeature(true, makeShapeNeighs()),
      new TokenSequenceToFeature(),                       // convert the strings in the text to features
      new TokenSequence2FeatureVectorSequence(alpha, true, true),
      labelPipe,
      new LabelSequenceToLabelsAssignment(alpha, labelAlpha)
  ));
}
 
开发者ID:steveash,项目名称:jg2p,代码行数:17,代码来源:PhonemeACrfTrainer.java

示例11: main

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
  File trainFile = new File (args[0]);
  File testFile = new File (args[1]);
  File crfFile = new File (args[2]);

  Pipe pipe = new SerialPipes (new Pipe[] {
      new GenericAcrfData2TokenSequence (2),
      new TokenSequence2FeatureVectorSequence (true, true),
  });

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  ACRF.Template[] tmpls = new ACRF.Template[] {
          new ACRF.BigramTemplate (0),
          new ACRF.BigramTemplate (1),
          new ACRF.PairwiseFactorTemplate (0,1),
          new CrossTemplate1 (0,1)
  };

  ACRF acrf = new ACRF (pipe, tmpls);

  ACRFTrainer trainer = new DefaultAcrfTrainer ();
  trainer.train (acrf, training, null, testing, 99999);

  FileUtils.writeGzippedObject (crfFile, acrf);
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:36,代码来源:SimpleCrfExample.java

示例12: buildSerialPipes

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private SerialPipes buildSerialPipes(List<String> featureNames, List<String> replacements,
        List<String> conjunctions) throws LangDetectException, IOException {
    ArrayList<Pipe> pipes = new ArrayList<Pipe>();
    pipes.add(new LineGroupString2TokenSequence());
    pipes.add(new AddTargetToLinePipe(6));
    pipes.add(new LineToTargetTextPipe());
    pipes.add(new TargetReplacementPipe(replacements));

    FeaturePipeProvider featurePipeProvider = new FeaturePipeProvider();
    for (String featureName : featureNames) {
        pipes.add(featurePipeProvider.getPipe(featureName));
    }

    int[][] offsetConjunctions = new int[conjunctions.size()][];
    for (int i = 0; i < conjunctions.size(); i++) {
        String conjunction = conjunctions.get(i).replaceAll("min", "-");
        String[] conjunctionElements = conjunction.split(";");
        int[] conjunctionArray = new int[conjunctionElements.length];
        for (int j = 0; j < conjunctionElements.length; j++) {
            conjunctionArray[j] = Integer.parseInt(conjunctionElements[j]);
        }
        offsetConjunctions[i] = conjunctionArray;
    }
    pipes.add(new OffsetConjunctions(offsetConjunctions));

    pipes.add(new TokenSequence2FeatureVectorSequence(false, false));
    pipes.add(new Target2LabelSequence());

    // pipes.add(new PrintInputAndTarget());

    return new SerialPipes(pipes);

}
 
开发者ID:exciteproject,项目名称:refext,代码行数:34,代码来源:ReferenceExtractorTrainer.java

示例13: main

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
  File trainFile = new File (args[0]);
  File testFile = new File (args[1]);
  File crfFile = new File (args[2]);

  Pipe pipe = new SerialPipes (new Pipe[] {
      new GenericAcrfData2TokenSequence (2),
      new TokenSequence2FeatureVectorSequence (true, true),
  });

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  InstanceList testing = new InstanceList (pipe);
  training.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  ACRF.Template[] tmpls = new ACRF.Template[] {
          new ACRF.BigramTemplate (0),
          new ACRF.BigramTemplate (1),
          new ACRF.PairwiseFactorTemplate (0,1),
          new CrossTemplate1 (0,1)
  };

  ACRF acrf = new ACRF (pipe, tmpls);

  ACRFTrainer trainer = new DefaultAcrfTrainer ();
  trainer.train (acrf, training, null, testing, 99999);

  FileUtils.writeGzippedObject (crfFile, acrf);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:36,代码来源:SimpleCrfExample.java

示例14: makeSpacePredictionPipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."),
			new TokenSequenceLowercase(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenText(),
			new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
					{ -1, 0 },

			// Original test had this conjunction in it too
					// {1},{-1,0},{0,1},
					// {0, 1},

					// I'd like to comment out this next line to make it run
					// faster, but then we'd need to adjust likelihood and
					// accuracy test values. -akm 12/2007
					// TODO uncomment this line
					// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},

					// (These were commented before...)
					// {-2,-1}, {-1,0}, {0,1}, {1,2},
					// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
					}),
			// new PrintInputAndTarget(),
			new TokenSequence2FeatureVectorSequence() });
	return p;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:TestCRF.java

示例15: makePipe

import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private SerialPipes makePipe(Alphabet alpha) {
    Target2LabelSequence labelPipe = new Target2LabelSequence();
    LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();

    return new SerialPipes(ImmutableList.of(
        new AlignmentToTokenSequence(alpha, labelAlpha, true, true, false),   // convert to token sequence
        new TokenSequenceLowercase(),                       // make all lowercase
        new NeighborTokenFeature(true, makeNeighbors()),         // grab neighboring graphemes
        new NeighborShapeFeature(true, makeShapeNeighs()),
//        new WindowFeature(false, 4),
//        new WindowFeature(true, 6),
        new NeighborSyllableFeature(-2, -1, 1, 2),
        new SyllCountingFeature(),
        new SyllCharRoleFeature(),
//        new NearSyllFeature(true),
//        new NearSyllFeature(false),
//        new SyllMarkingFeature(),
//        new SyllSequenceFeature(),
//        new SyllRelativeMarkFeature(),
        new EndingVowelFeature(),
        //new SonorityFeature2(true),
        //new SonorityFeature2(false),
//        new WindowFeature(false, 4),
        new VowelWindowFeature(2, 1, "PRESYL_", -1, false),
        new VowelWindowFeature(2, 1, "PSTSYL_", 1, false),
//        new VowelWindowFeature(3, 2, "LSTSYL_", 0, true),
        new SurroundingTokenFeature2(false, 1, 1),
//        new SurroundingTokenFeature2(true, 1, 1),
        new SurroundingTokenFeature2(false, 2, 2),
//        new SurroundingTokenFeature2(false, 3, 2),
        new SurroundingTokenFeature2(true, 3, 3),
//        new SurroundingTokenFeature2(true, 4, 4),
//        new LeadingTrailingFeature(),
        new TokenSequenceToFeature(),                       // convert the strings in the text to features
        new TokenSequence2FeatureVectorSequence(alpha, true, false),
        labelPipe
    ));
  }
 
开发者ID:steveash,项目名称:jg2p,代码行数:39,代码来源:PhonemeCrfTrainer.java


注:本文中的cc.mallet.pipe.TokenSequence2FeatureVectorSequence类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。