当前位置: 首页>>代码示例>>Java>>正文


Java DirectoryDataWriterFactory类代码示例

本文整理汇总了Java中org.cleartk.ml.jar.DirectoryDataWriterFactory的典型用法代码示例。如果您正苦于以下问题:Java DirectoryDataWriterFactory类的具体用法?Java DirectoryDataWriterFactory怎么用?Java DirectoryDataWriterFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DirectoryDataWriterFactory类属于org.cleartk.ml.jar包,在下文中一共展示了DirectoryDataWriterFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeModel

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
/**
 * @param posTagFile
 * @param modelDirectory
 * @param language
 * @throws UIMAException
 * @throws IOException
 */
public static void writeModel(File posTagFile, String modelDirectory, String language) throws UIMAException, IOException {

    CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
            posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());

    AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);

    AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
            NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, "src/main/resources/feature/features.xml",
            NERAnnotator.PARAM_IS_TRAINING, true,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDirectory,
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);

    runPipeline(
            posTagFileReader,
            createEngine(NERReader.class),
            snowballStemmer,
            nerAnnotator
    );
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:28,代码来源:ExecuteNER.java

示例2: writeModel

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
/**
 * @param posTagFile
 * @param configFileName
 * @param language
 * @throws UIMAException
 * @throws IOException
 */
private void writeModel(File posTagFile, String language, String configFileName) throws UIMAException, IOException {

    new File(getModelDir()).mkdirs();

    CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
            posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());

    AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);

    AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
            NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, FEATURE_EXTRACTOR_CONFIG_DIRECTORY + configFileName,
            NERAnnotator.PARAM_IS_TRAINING, true,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, getModelDir(),
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);

    runPipeline(
            posTagFileReader,
            createEngine(NERReader.class),
            snowballStemmer,
            nerAnnotator
    );
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:30,代码来源:ExecuteFeatureAblationTest.java

示例3: train

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
  AggregateBuilder builder = new AggregateBuilder();
  builder.add(UriToDocumentTextAnnotator.getDescription());
  builder.add(SentenceAnnotator.getDescription());
  builder.add(TokenAnnotator.getDescription());
  builder.add(PosTaggerAnnotator.getDescription());
  builder.add(DefaultSnowballStemmer.getDescription("English"));
  builder.add(AnalysisEngineFactory.createEngineDescription(GoldQuestionCategoryAnnotator.class));
  AnalysisEngineDescription documentClassificationAnnotator = AnalysisEngineFactory.createEngineDescription(
      QuestionCategoryAnnotator.class, CleartkAnnotator.PARAM_IS_TRAINING, true,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class.getName());
  builder.add(documentClassificationAnnotator);
  SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
  System.err.println("Train model and write model.jar file.");
  HideOutput hider = new HideOutput();
  Train.main(outputDirectory, this.trainingArguments.toArray(new String[this.trainingArguments.size()]));
  hider.restoreOutput();
}
 
开发者ID:utk4rsh,项目名称:question-classifier,代码行数:21,代码来源:QuestionCategoryEvaluation.java

示例4: train

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Override
protected void train(CollectionReader collectionReader, File directory) throws Exception {
	String tmpView = "DP";

	AggregateBuilder b = new AggregateBuilder();

	b.add(AnalysisEngineFactory.createEngineDescription(PrepareClearTk.class, PrepareClearTk.PARAM_VIEW_NAME,
			tmpView, PrepareClearTk.PARAM_ANNOTATION_TYPE, DramatisPersonae.class,
			PrepareClearTk.PARAM_SUBANNOTATIONS, Arrays.asList(Figure.class)));
	b.add(AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class), CAS.NAME_DEFAULT_SOFA,
			tmpView);
	b.add(AnalysisEngineFactory.createEngineDescription(ClearTkGenderAnnotator.class,
			DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class,
			DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, directory), CAS.NAME_DEFAULT_SOFA, tmpView);
	b.add(AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION,
			"target/xmi"));

	SimplePipeline.runPipeline(collectionReader, b.createAggregate());

	Train.main(directory, new String[] { "-t", "0" });
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:22,代码来源:Evaluation.java

示例5: main

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
			XmiReader.PARAM_SOURCE_LOCATION, "src/main/resources/gender/*/*.xmi", XmiReader.PARAM_LENIENT, true);

	String tmpView = "DP";

	AggregateBuilder b = new AggregateBuilder();

	b.add(AnalysisEngineFactory.createEngineDescription(PrepareClearTk.class, PrepareClearTk.PARAM_VIEW_NAME,
			tmpView, PrepareClearTk.PARAM_ANNOTATION_TYPE, DramatisPersonae.class,
			PrepareClearTk.PARAM_SUBANNOTATIONS, Arrays.asList(Figure.class)));
	b.add(AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class), CAS.NAME_DEFAULT_SOFA,
			tmpView);
	b.add(AnalysisEngineFactory.createEngineDescription(ClearTkGenderAnnotator.class,
			DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class,
			DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, "target/models"), CAS.NAME_DEFAULT_SOFA, tmpView);
	b.add(AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION,
			"target/xmi"));
	SimplePipeline.runPipeline(reader, b.createAggregateDescription());

	Train.main(new File("target/models"), new String[] { "-t", "0" });
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:23,代码来源:Training.java

示例6: main

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Options options = CliFactory.parseArguments(Options.class, args);

	// a reader that loads the URIs of the training files
	CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
			XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);

	// run the pipeline over the training corpus
	SimplePipeline.runPipeline(reader,
			createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
					FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
					ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
			createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
					DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
					DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
					MalletCrfStringOutcomeDataWriter.class),
			createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));

	// train a Mallet CRF model on the training data
	Train.main(options.getModelDirectory());
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:22,代码来源:MentionDetectionTraining.java

示例7: testIssue339

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testIssue339() throws Exception {
  AnalysisEngine dataWriterAnnotator = AnalysisEngineFactory.createEngine(
      TestIssue339Annotator.class,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      WekaStringOutcomeDataWriter.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectory);
  dataWriterAnnotator.process(jCas);
  dataWriterAnnotator.collectionProcessComplete();
  File outputFile = new File(this.outputDirectory, "training-data.arff");
  String output = Files.toString(outputFile, Charsets.US_ASCII);
  
  // make sure that at least one instance was written
  Pattern emptyData = Pattern.compile("@data\\s*\\{\\}");
  boolean hasEmptyData = emptyData.matcher(output).find();
  Assert.assertFalse(hasEmptyData);
  
  // make sure that the "NN" value shows up
  Assert.assertTrue(output.contains("0 NN"));
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:22,代码来源:WekaDataWriterTest.java

示例8: testMalletCRF

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMalletCRF() throws Exception {
  this.assumeLongTestsEnabled();
  this.logger.info(LONG_TEST_MESSAGE);

  String outDirectoryName = outputDirectoryName + "/malletcrf";
  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletCrfStringOutcomeDataWriter.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName);
  testClassifier(dataWriter, outDirectoryName, -1); // viterbi stack size is meaningless here so
                                                    // pass in an invalid value to make sure it is
                                                    // ignored.

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
  assertEquals(
      "2008/NN Sichuan/CD earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
      firstLine);

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:ExamplePosClassifierTest.java

示例9: testMaxent

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMaxent() throws Exception {
  String outDirectoryName = outputDirectoryName + "/maxent";

  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
      ViterbiDataWriterFactory.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MaxentStringOutcomeDataWriter.class.getName(),
      ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
      new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
  testClassifier(dataWriter, outDirectoryName, 10);

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0];
  assertEquals(
      "2008/CD Sichuan/JJ earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/IN",
      firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:23,代码来源:ExamplePosClassifierTest.java

示例10: testMalletMaxent

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMalletMaxent() throws Exception {
  String outDirectoryName = outputDirectoryName + "/mallet-maxent";

  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
      ViterbiDataWriterFactory.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletStringOutcomeDataWriter.class.getName(),
      ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
      new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
  testClassifier(dataWriter, outDirectoryName, 10, "MaxEnt");

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0];
  assertEquals(
      "2008/DT Sichuan/JJ earthquake/NN From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
      firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:23,代码来源:ExamplePosClassifierTest.java

示例11: testMalletNaiveBayes

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMalletNaiveBayes() throws Exception {
  String outDirectoryName = outputDirectoryName + "/mallet-naive-bayes";

  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
      ViterbiDataWriterFactory.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletStringOutcomeDataWriter.class.getName(),
      ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
      new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
  testClassifier(dataWriter, outDirectoryName, 10, "NaiveBayes");

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0];
  assertEquals(
      "2008/DT Sichuan/JJ earthquake/NN From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/IN",
      firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:23,代码来源:ExamplePosClassifierTest.java

示例12: testMalletC45

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMalletC45() throws Exception {
  this.assumeLongTestsEnabled();
  this.logger.info(LONG_TEST_MESSAGE);

  String outDirectoryName = outputDirectoryName + "/mallet-c45";

  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      ExamplePosAnnotator.class,
      CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
      ViterbiDataWriterFactory.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MalletStringOutcomeDataWriter.class.getName(),
      ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
      new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
  testClassifier(dataWriter, outDirectoryName, 10, "C45");

  String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0];
  assertEquals(
      "2008/CD Sichuan/JJ earthquake/NN From/NN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
      firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:26,代码来源:ExamplePosClassifierTest.java

示例13: testMaxent

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testMaxent() throws Exception {
  String maxentDirectoryName = outputDirectoryName + "/maxent";
  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      NonSequenceExamplePosAnnotator.class,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      MaxentStringOutcomeDataWriter.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      maxentDirectoryName);
  testClassifier(dataWriter, maxentDirectoryName);

  // Not sure why the _SPLIT is here, but we will throw it out for good measure
  String firstLine = FileUtil.loadListOfStrings(new File(maxentDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0].trim().replace("_SPLIT", "");
  checkPOS(firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:NonSequenceExamplePosAnnotatorTest.java

示例14: testSVMLIGHT

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
@Test
public void testSVMLIGHT() throws Exception {
  this.assumeTestsEnabled(ExamplePosClassifierTest.SVMLIGHT_TESTS_PROPERTY_VALUE);
  this.logger.info(ExamplePosClassifierTest.SVMLIGHT_TESTS_ENABLED_MESSAGE);

  String svmlightDirectoryName = outputDirectoryName + "/svmlight";
  AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
      NonSequenceExamplePosAnnotator.class,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      SvmLightStringOutcomeDataWriter.class.getName(),
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      svmlightDirectoryName);
  testClassifier(dataWriter, svmlightDirectoryName, "-c", "0.1");

  String firstLine = FileUtil.loadListOfStrings(new File(svmlightDirectoryName
      + "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
  checkPOS(firstLine);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:19,代码来源:NonSequenceExamplePosAnnotatorTest.java

示例15: getWriterDescription

import org.cleartk.ml.jar.DirectoryDataWriterFactory; //导入依赖的package包/类
public AnalysisEngineDescription getWriterDescription(File outputDirectory)
    throws ResourceInitializationException {
  Class<?> dataWriterClass = this.getDataWriterClass();
  String paramName;
  if (SequenceDataWriter.class.isAssignableFrom(dataWriterClass)) {
    paramName = DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
  } else if (DataWriter.class.isAssignableFrom(dataWriterClass)) {
    paramName = DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
  } else {
    throw new RuntimeException("Invalid data writer class: " + dataWriterClass);
  }
  AnalysisEngineDescription desc = getBaseDescription();
  ResourceCreationSpecifierFactory.setConfigurationParameters(
      desc,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      outputDirectory.getPath(),
      paramName,
      dataWriterClass.getName());
  return desc;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:CleartkInternalModelFactory.java


注:本文中的org.cleartk.ml.jar.DirectoryDataWriterFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。