本文整理汇总了Java中org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter类的典型用法代码示例。如果您正苦于以下问题:Java MalletCrfStringOutcomeDataWriter类的具体用法?Java MalletCrfStringOutcomeDataWriter怎么用?Java MalletCrfStringOutcomeDataWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MalletCrfStringOutcomeDataWriter类属于org.cleartk.ml.mallet包,在下文中一共展示了MalletCrfStringOutcomeDataWriter类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the training files
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);
// run the pipeline over the training corpus
SimplePipeline.runPipeline(reader,
createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class),
createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));
// train a Mallet CRF model on the training data
Train.main(options.getModelDirectory());
}
示例2: testMalletCRF
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void testMalletCRF() throws Exception {
this.assumeLongTestsEnabled();
this.logger.info(LONG_TEST_MESSAGE);
String outDirectoryName = outputDirectoryName + "/malletcrf";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName);
testClassifier(dataWriter, outDirectoryName, -1); // viterbi stack size is meaningless here so
// pass in an invalid value to make sure it is
// ignored.
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
assertEquals(
"2008/NN Sichuan/CD earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
firstLine);
}
示例3: getWriterDescription
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static AnalysisEngineDescription getWriterDescription(String outputDirectory)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(ReasonAnnotator.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class);
}
示例4: train
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
aggregate
.add(createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class));
// our NamedEntityChunker annotator, configured to write Mallet CRF
// training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(ClearTkMentionAnnotator.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
// quiet Mallet down a bit (but still leave likelihoods so you can see
// progress)
Logger malletLogger = Logger.getLogger("cc.mallet");
malletLogger.setLevel(Level.WARNING);
Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
likelihoodLogger.setLevel(Level.INFO);
// train a Mallet CRF model on the training data
Train.main(outputDirectory);
}
示例5: train
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
// an annotator that loads the text from the training file URIs
aggregate.add(UriToDocumentTextAnnotator.getDescription());
// an annotator that parses and loads MASC named entity annotations (and tokens)
aggregate.add(MascGoldAnnotator.getDescription());
// an annotator that adds part-of-speech tags
aggregate.add(PosTaggerAnnotator.getDescription());
// our NamedEntityChunker annotator, configured to write Mallet CRF training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(
NamedEntityChunker.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING,
true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory,
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
// quiet Mallet down a bit (but still leave likelihoods so you can see progress)
Logger malletLogger = Logger.getLogger("cc.mallet");
malletLogger.setLevel(Level.WARNING);
Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
likelihoodLogger.setLevel(Level.INFO);
// train a Mallet CRF model on the training data
Train.main(outputDirectory);
}
示例6: main
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the training files
CollectionReaderDescription reader = UriCollectionReader.getDescriptionFromDirectory(
options.getTrainDirectory(),
MascTextFileFilter.class,
null);
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
// an annotator that loads the text from the training file URIs
aggregate.add(UriToDocumentTextAnnotator.getDescription());
// an annotator that parses and loads MASC named entity annotations (and tokens)
aggregate.add(MascGoldAnnotator.getDescription());
// an annotator that adds part-of-speech tags (so we can use them for features)
aggregate.add(PosTaggerAnnotator.getDescription());
// our NamedEntityChunker annotator, configured to write Mallet CRF training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(
NamedEntityChunker.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING,
true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
options.getModelDirectory(),
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
// train a Mallet CRF model on the training data
Train.main(options.getModelDirectory());
}
示例7: testSequenceDataWriterAnnotator
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void testSequenceDataWriterAnnotator() throws IOException, UIMAException {
AnalysisEngine engine = AnalysisEngineFactory.createEngine(
TestAnnotator.class,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectoryName,
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class.getName());
// create some tokens and sentences
// add part-of-speech and stems to tokens
String text = "What if we built a large\r\n, wooden badger?";
tokenBuilder.buildTokens(
jCas,
text,
"What if we built a large \n, wooden badger ?",
"WDT TO PRP VBN DT JJ , JJ NN .");
engine.process(jCas);
engine.collectionProcessComplete();
File trainFile = new MalletCrfStringOutcomeClassifierBuilder().getTrainingDataFile(this.outputDirectory);
BufferedReader input = new BufferedReader(new FileReader(trainFile));
String line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" WDT"));
assertTrue(line.startsWith("What "));
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" TO"));
assertTrue(line.startsWith("if "));
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" PRP"));
assertTrue(line.startsWith("we "));
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" VBN"));
assertTrue(line.startsWith("built "));
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" DT"));
assertTrue(line.startsWith("a "));
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" JJ"));
assertTrue(line.startsWith("large "));
line = input.readLine();
assertNotNull(line);
assertEquals("", line.trim());
line = input.readLine();
assertNotNull(line);
assertTrue(line.endsWith(" ,"));
assertTrue(line.startsWith(", "));
input.close();
}
示例8: runTest1
import org.cleartk.ml.mallet.MalletCrfStringOutcomeDataWriter; //导入依赖的package包/类
@Test
public void runTest1() throws Exception {
AnalysisEngine dataWriterAnnotator = AnalysisEngineFactory.createEngine(
TestAnnotator.class,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectoryName,
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class.getName());
dataWriterAnnotator.process(jCas);
dataWriterAnnotator.collectionProcessComplete();
File trainFile = new MalletCrfStringOutcomeClassifierBuilder().getTrainingDataFile(this.outputDirectory);
BufferedReader reader = new BufferedReader(new FileReader(trainFile));
reader.readLine();
reader.close();
HideOutput hider = new HideOutput();
Train.main(outputDirectoryName);
hider.restoreOutput();
MalletCrfStringOutcomeClassifierBuilder builder = new MalletCrfStringOutcomeClassifierBuilder();
MalletCrfStringOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
List<List<Feature>> sequenceFeatures = new ArrayList<List<Feature>>();
List<Instance<String>> instances = createInstances();
for (Instance<String> instance : instances) {
sequenceFeatures.add(instance.getFeatures());
}
List<String> outcomes = classifier.classify(sequenceFeatures);
assertEquals(sequenceFeatures.size(), outcomes.size());
testLabels(
outcomes,
"O O O O O O O O O O O O O O O B-GENE I-GENE I-GENE O B-GENE I-GENE O O O O O O O O O O O O O O O O O O O O O");
AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createEngine(
TestAnnotator.class,
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
JarClassifierBuilder.getModelJarFile(outputDirectoryName));
jCas.reset();
classifierAnnotator.process(jCas);
classifierAnnotator.collectionProcessComplete();
}