本文整理汇总了Java中org.cleartk.ml.CleartkSequenceAnnotator类的典型用法代码示例。如果您正苦于以下问题:Java CleartkSequenceAnnotator类的具体用法?Java CleartkSequenceAnnotator怎么用?Java CleartkSequenceAnnotator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CleartkSequenceAnnotator类属于org.cleartk.ml包,在下文中一共展示了CleartkSequenceAnnotator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the training files
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);
// run the pipeline over the training corpus
SimplePipeline.runPipeline(reader,
createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class),
createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));
// train a Mallet CRF model on the training data
Train.main(options.getModelDirectory());
}
示例2: testMaxent
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testMaxent() throws Exception {
String outDirectoryName = outputDirectoryName + "/maxent";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MaxentStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 10);
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0];
assertEquals(
"2008/CD Sichuan/JJ earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/IN",
firstLine);
}
示例3: testMalletMaxent
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testMalletMaxent() throws Exception {
String outDirectoryName = outputDirectoryName + "/mallet-maxent";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 10, "MaxEnt");
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0];
assertEquals(
"2008/DT Sichuan/JJ earthquake/NN From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
firstLine);
}
示例4: testMalletNaiveBayes
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testMalletNaiveBayes() throws Exception {
String outDirectoryName = outputDirectoryName + "/mallet-naive-bayes";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 10, "NaiveBayes");
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0];
assertEquals(
"2008/DT Sichuan/JJ earthquake/NN From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/IN",
firstLine);
}
示例5: testMalletC45
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testMalletC45() throws Exception {
this.assumeLongTestsEnabled();
this.logger.info(LONG_TEST_MESSAGE);
String outDirectoryName = outputDirectoryName + "/mallet-c45";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 10, "C45");
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0];
assertEquals(
"2008/CD Sichuan/JJ earthquake/NN From/NN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
firstLine);
}
示例6: getWriterDescription
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public static AnalysisEngineDescription getWriterDescription(String outputDirectory)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(ReasonAnnotator.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class);
}
示例7: train
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
aggregate
.add(createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class));
// our NamedEntityChunker annotator, configured to write Mallet CRF
// training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(ClearTkMentionAnnotator.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
// quiet Mallet down a bit (but still leave likelihoods so you can see
// progress)
Logger malletLogger = Logger.getLogger("cc.mallet");
malletLogger.setLevel(Level.WARNING);
Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
likelihoodLogger.setLevel(Level.INFO);
// train a Mallet CRF model on the training data
Train.main(outputDirectory);
}
示例8: getWriterDescription
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public static AnalysisEngineDescription getWriterDescription(String outputDirectory)
throws ResourceInitializationException {
return AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory,
ViterbiDataWriterFactory.PARAM_DELEGATED_DATA_WRITER_FACTORY_CLASS,
DefaultDataWriterFactory.class.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MaxentStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
}
示例9: train
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
// an annotator that loads the text from the training file URIs
aggregate.add(UriToDocumentTextAnnotator.getDescription());
// an annotator that parses and loads MASC named entity annotations (and tokens)
aggregate.add(MascGoldAnnotator.getDescription());
// an annotator that adds part-of-speech tags
aggregate.add(PosTaggerAnnotator.getDescription());
// our NamedEntityChunker annotator, configured to write Mallet CRF training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(
NamedEntityChunker.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING,
true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outputDirectory,
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription());
// quiet Mallet down a bit (but still leave likelihoods so you can see progress)
Logger malletLogger = Logger.getLogger("cc.mallet");
malletLogger.setLevel(Level.WARNING);
Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood");
likelihoodLogger.setLevel(Level.INFO);
// train a Mallet CRF model on the training data
Train.main(outputDirectory);
}
示例10: main
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the text file
CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile()));
// assemble the classification pipeline
AggregateBuilder aggregate = new AggregateBuilder();
// an annotator that loads the text from the training file URIs
aggregate.add(UriToDocumentTextAnnotator.getDescription());
// annotators that identify sentences, tokens and part-of-speech tags in the text
aggregate.add(SentenceAnnotator.getDescription());
aggregate.add(TokenAnnotator.getDescription());
aggregate.add(PosTaggerAnnotator.getDescription());
// our NamedEntityChunker annotator, configured to classify on the new texts
aggregate.add(AnalysisEngineFactory.createEngineDescription(
NamedEntityChunker.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING,
false,
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
JarClassifierBuilder.getModelJarFile(options.getModelDirectory())));
// a very simple annotator that just prints out any named entities we found
aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class));
// run the classification pipeline on the new texts
SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
}
示例11: main
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the training files
CollectionReaderDescription reader = UriCollectionReader.getDescriptionFromDirectory(
options.getTrainDirectory(),
MascTextFileFilter.class,
null);
// assemble the training pipeline
AggregateBuilder aggregate = new AggregateBuilder();
// an annotator that loads the text from the training file URIs
aggregate.add(UriToDocumentTextAnnotator.getDescription());
// an annotator that parses and loads MASC named entity annotations (and tokens)
aggregate.add(MascGoldAnnotator.getDescription());
// an annotator that adds part-of-speech tags (so we can use them for features)
aggregate.add(PosTaggerAnnotator.getDescription());
// our NamedEntityChunker annotator, configured to write Mallet CRF training data
aggregate.add(AnalysisEngineFactory.createEngineDescription(
NamedEntityChunker.class,
CleartkSequenceAnnotator.PARAM_IS_TRAINING,
true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
options.getModelDirectory(),
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class));
// run the pipeline over the training corpus
SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription());
// train a Mallet CRF model on the training data
Train.main(options.getModelDirectory());
}
示例12: testLibsvm
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testLibsvm() throws Exception {
String outDirectoryName = outputDirectoryName + "/libsvm";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
LibSvmStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 1, "-t", "0"); // MultiClassLIBSVMClassifier.score
// is not implemented so we cannot
// have a stack size greater than 1.
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
boolean badTags = firstLine.equals("2008/NN Sichuan/NN earthquake/NN From/NN Wikipedia/NN ,/NN the/NN free/NN encyclopedia/NN");
assertFalse(badTags);
assertEquals(
"2008/NN Sichuan/NN earthquake/NN From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
firstLine);
}
示例13: testSVMLIGHT
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testSVMLIGHT() throws Exception {
this.assumeTestsEnabled(SVMLIGHT_TESTS_PROPERTY_VALUE);
this.logger.info(SVMLIGHT_TESTS_ENABLED_MESSAGE);
String outDirectoryName = outputDirectoryName + "/svmlight";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
SvmLightStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 1);
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0].trim();
boolean badTags = firstLine.equals("2008/NN Sichuan/NN earthquake/NN From/NN Wikipedia/NN ,/NN the/NN free/NN encyclopedia/NN");
assertFalse(badTags);
assertEquals(
"2008/CD Sichuan/JJ earthquake/NNS From/IN Wikipedia/NN ,/, the/DT free/NN encyclopedia/IN",
firstLine);
}
示例14: testDataWriterDescriptor
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
@Test
public void testDataWriterDescriptor() throws UIMAException {
AnalysisEngine engine = AnalysisEngineFactory.createEngine(ExamplePosAnnotator.getWriterDescription(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY));
String outputDir = (String) engine.getConfigParameterValue(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY);
outputDir = outputDir.replace(File.separatorChar, '/');
Assert.assertEquals(ExamplePosAnnotator.DEFAULT_OUTPUT_DIRECTORY, outputDir);
String expectedDataWriterFactory = (ViterbiDataWriterFactory.class.getName());
Object dataWriter = engine.getConfigParameterValue(CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME);
Assert.assertEquals(expectedDataWriterFactory, dataWriter);
engine.collectionProcessComplete();
}
示例15: getWriterDescription
import org.cleartk.ml.CleartkSequenceAnnotator; //导入依赖的package包/类
public AnalysisEngineDescription getWriterDescription(File directory, Model.Params params)
throws ResourceInitializationException {
AnalysisEngineDescription desc;
if (params.nViterbiOutcomes > 0) {
desc = AnalysisEngineFactory.createEngineDescription(
this.annotatorClass,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class,
ViterbiDataWriterFactory.PARAM_DELEGATED_DATA_WRITER_FACTORY_CLASS,
DefaultDataWriterFactory.class,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
params.dataWriterClass,
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
DefaultOutcomeFeatureExtractor.class,
DefaultOutcomeFeatureExtractor.PARAM_MOST_RECENT_OUTCOME,
1,
DefaultOutcomeFeatureExtractor.PARAM_LEAST_RECENT_OUTCOME,
params.nViterbiOutcomes,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.getModelDirectory(directory, params));
} else {
String datatWriterParamName;
if (SequenceDataWriter.class.isAssignableFrom(params.dataWriterClass)) {
datatWriterParamName = DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
} else if (DataWriter.class.isAssignableFrom(params.dataWriterClass)) {
datatWriterParamName = DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
} else {
throw new RuntimeException("Invalid data writer class: " + params.dataWriterClass);
}
desc = AnalysisEngineFactory.createEngineDescription(
this.annotatorClass,
datatWriterParamName,
params.dataWriterClass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.getModelDirectory(directory, params));
}
return desc;
}