当前位置: 首页>>代码示例>>Java>>正文


Java MalletCrfStringOutcomeDataWriter类代码示例

本文整理汇总了Java中org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter的典型用法代码示例。如果您正苦于以下问题:Java MalletCrfStringOutcomeDataWriter类的具体用法?Java MalletCrfStringOutcomeDataWriter怎么用?Java MalletCrfStringOutcomeDataWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


MalletCrfStringOutcomeDataWriter类属于org.cleartk.ml.mallet包,在下文中一共展示了MalletCrfStringOutcomeDataWriter类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Options options = CliFactory.parseArguments(Options.class, args);

	// a reader that loads the URIs of the training files
	CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
			XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);

	// run the pipeline over the training corpus
	SimplePipeline.runPipeline(reader,
			createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
					FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
					ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
			createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
					DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
					DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
					MalletCrfStringOutcomeDataWriter.class),
			createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));

	// train a Mallet CRF model on the training data
	Train.main(options.getModelDirectory());
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:22,代码来源:MentionDetectionTraining.java

示例2: testMalletCRF

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void testMalletCRF() throws Exception {
  this.assumeLongTestsEnabled();
  this.logger.info(LONG_TEST_MESSAGE);

  String outDirectoryName = outputDirectoryName + "/malletcrf";
  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName);
  testClassifier(dataWriter, outDirectoryName, -1); // viterbi stack size is meaningless here so
                                                    // pass in an invalid value to make sure it is
                                                    // ignored.

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
  assertEquals(
      "2008/NN Sichuan/CD earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
      firstLine);

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:ExamplePosClassifierTest.java

示例3: getWriterDescription

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static AnalysisEngineDescription getWriterDescription(String outputDirectory)
		throws ResourceInitializationException {
	return AnalysisEngineFactory.createEngineDescription(ReasonAnnotator.class,
			CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
			outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			MalletCrfStringOutcomeDataWriter.class);
}
 
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:8,代码来源:ReasonAnnotator.java

示例4: train

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
	// assemble the training pipeline
	AggregateBuilder aggregate = new AggregateBuilder();

	aggregate
			.add(createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
					FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
					ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class));
	// our NamedEntityChunker annotator, configured to write Mallet CRF
	// training data
	aggregate.add(AnalysisEngineFactory.createEngineDescription(ClearTkMentionAnnotator.class,
			CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
			outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			MalletCrfStringOutcomeDataWriter.class));

	// run the pipeline over the training corpus
	SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());

	// quiet Mallet down a bit (but still leave likelihoods so you can see
	// progress)
	Logger malletLogger = Logger.getLogger("cc.mallet");
	malletLogger.setLevel(Level.WARNING);
	Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
	likelihoodLogger.setLevel(Level.INFO);

	// train a Mallet CRF model on the training data
	Train.main(outputDirectory);

}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:31,代码来源:MentionDetectionEvaluation.java

示例5: train

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
  // assemble the training pipeline
  AggregateBuilder aggregate = new AggregateBuilder();

  // an annotator that loads the text from the training file URIs
  aggregate.add(UriToDocumentTextAnnotator.getDescription());

  // an annotator that parses and loads MASC named entity annotations (and tokens)
  aggregate.add(MascGoldAnnotator.getDescription());

  // an annotator that adds part-of-speech tags
  aggregate.add(PosTaggerAnnotator.getDescription());

  // our NamedEntityChunker annotator, configured to write Mallet CRF training data
  aggregate.add(AnalysisEngineFactory.createEngineDescription(
      NamedEntityChunker.class,
      CleartkSequenceAnnotator.PARAM_IS_TRAINING,
      true,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outputDirectory,
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class));

  // run the pipeline over the training corpus
  SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());

  // quiet Mallet down a bit (but still leave likelihoods so you can see progress)
  Logger malletLogger = Logger.getLogger("cc.mallet");
  malletLogger.setLevel(Level.WARNING);
  Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
  likelihoodLogger.setLevel(Level.INFO);

  // train a Mallet CRF model on the training data
  Train.main(outputDirectory);

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:38,代码来源:EvaluateNamedEntityChunker.java

示例6: main

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  Options options = CliFactory.parseArguments(Options.class, args);

  // a reader that loads the URIs of the training files
  CollectionReaderDescription reader = UriCollectionReader.getDescriptionFromDirectory(
      options.getTrainDirectory(),
      MascTextFileFilter.class,
      null);

  // assemble the training pipeline
  AggregateBuilder aggregate = new AggregateBuilder();

  // an annotator that loads the text from the training file URIs
  aggregate.add(UriToDocumentTextAnnotator.getDescription());

  // an annotator that parses and loads MASC named entity annotations (and tokens)
  aggregate.add(MascGoldAnnotator.getDescription());

  // an annotator that adds part-of-speech tags (so we can use them for features)
  aggregate.add(PosTaggerAnnotator.getDescription());

  // our NamedEntityChunker annotator, configured to write Mallet CRF training data
  aggregate.add(AnalysisEngineFactory.createEngineDescription(
      NamedEntityChunker.class,
      CleartkSequenceAnnotator.PARAM_IS_TRAINING,
      true,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      options.getModelDirectory(),
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class));

  // run the pipeline over the training corpus
  SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());

  // train a Mallet CRF model on the training data
  Train.main(options.getModelDirectory());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:38,代码来源:TrainNamedEntityChunker.java

示例7: testSequenceDataWriterAnnotator

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void testSequenceDataWriterAnnotator() throws IOException, UIMAException {
  AnalysisEngine engine = AnalysisEngineFactory.createEngine(
      TestAnnotator.class,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outputDirectoryName,
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class.getName());

  // create some tokens and sentences
  // add part-of-speech and stems to tokens

  String text = "What if we built a large\r\n, wooden badger?";
  tokenBuilder.buildTokens(
      jCas,
      text,
      "What if we built a large \n, wooden badger ?",
      "WDT TO PRP VBN DT JJ , JJ NN .");
  engine.process(jCas);
  engine.collectionProcessComplete();

  File trainFile = new MalletCrfStringOutcomeClassifierBuilder().getTrainingDataFile(this.outputDirectory);
  BufferedReader input = new BufferedReader(new FileReader(trainFile));
  String line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" WDT"));
  assertTrue(line.startsWith("What "));
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" TO"));
  assertTrue(line.startsWith("if "));
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" PRP"));
  assertTrue(line.startsWith("we "));
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" VBN"));
  assertTrue(line.startsWith("built "));
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" DT"));
  assertTrue(line.startsWith("a "));
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" JJ"));
  assertTrue(line.startsWith("large "));
  line = input.readLine();
  assertNotNull(line);
  assertEquals("", line.trim());
  line = input.readLine();
  assertNotNull(line);
  assertTrue(line.endsWith(" ,"));
  assertTrue(line.startsWith(", "));
  input.close();
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:57,代码来源:MalletStringOutcomeDataWriterTest.java

示例8: runTest1

import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void runTest1() throws Exception {

  AnalysisEngine dataWriterAnnotator = AnalysisEngineFactory.createEngine(
      TestAnnotator.class,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outputDirectoryName,
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class.getName());

  dataWriterAnnotator.process(jCas);
  dataWriterAnnotator.collectionProcessComplete();

  File trainFile = new MalletCrfStringOutcomeClassifierBuilder().getTrainingDataFile(this.outputDirectory);
  BufferedReader reader = new BufferedReader(new FileReader(trainFile));
  reader.readLine();
  reader.close();
  HideOutput hider = new HideOutput();
  Train.main(outputDirectoryName);
  hider.restoreOutput();

  MalletCrfStringOutcomeClassifierBuilder builder = new MalletCrfStringOutcomeClassifierBuilder();
  MalletCrfStringOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);

  List<List<Feature>> sequenceFeatures = new ArrayList<List<Feature>>();
  List<Instance<String>> instances = createInstances();
  for (Instance<String> instance : instances) {
    sequenceFeatures.add(instance.getFeatures());
  }

  List<String> outcomes = classifier.classify(sequenceFeatures);
  assertEquals(sequenceFeatures.size(), outcomes.size());
  testLabels(
      outcomes,
      "O O O O O O O O O O O O O O O B-GENE I-GENE I-GENE O B-GENE I-GENE O O O O O O O O O O O O O O O O O O O O O");

  AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createEngine(
      TestAnnotator.class,
      GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
      JarClassifierBuilder.getModelJarFile(outputDirectoryName));
  jCas.reset();
  classifierAnnotator.process(jCas);
  classifierAnnotator.collectionProcessComplete();

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:47,代码来源:MalletCrfStringOutcomeClassifierTest.java


注:本文中的org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。