本文整理汇总了Java中org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription方法的典型用法代码示例。如果您正苦于以下问题:Java AnalysisEngineFactory.createEngineDescription方法的具体用法?Java AnalysisEngineFactory.createEngineDescription怎么用?Java AnalysisEngineFactory.createEngineDescription使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.uima.fit.factory.AnalysisEngineFactory
的用法示例。
在下文中一共展示了AnalysisEngineFactory.createEngineDescription方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../desc/TypeSystem.xml");
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("Patient is a 30-year-old man named Leroy Butler from Green Bay, WI.");
AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MistAnalysisEngine.class,
MistAnalysisEngine.PARAM_MODEL_PATH,
"SHARP/model/model");
SimplePipeline.runPipeline(jcas, aed);
for(Annotation annot : JCasUtil.select(jcas, Annotation.class)){
System.out.println("Found annotation: " + annot.getCoveredText());
}
JCas deidView = jcas.getView(MistAnalysisEngine.DEID_VIEW_NAME);
System.out.println("Deidentified version:");
System.out.println(deidView.getDocumentText());
}
示例2: getPipeline
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
/**
* Creates a tokenizing pipeline
*
* @throws IOException exception
*/
private static AnalysisEngineDescription getPipeline()
throws IOException
{
if (pipelineSingleton == null) {
try {
pipelineSingleton = AnalysisEngineFactory.createEngineDescription(
AnalysisEngineFactory.createEngineDescription(ParagraphSplitter.class,
ParagraphSplitter.PARAM_SPLIT_PATTERN,
ParagraphSplitter.SINGLE_LINE_BREAKS_PATTERN),
AnalysisEngineFactory.createEngineDescription(ArkTweetTokenizerFixed.class),
AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
StanfordSegmenter.PARAM_WRITE_TOKEN, false,
StanfordSegmenter.PARAM_ZONE_TYPES,
Paragraph.class.getCanonicalName()));
}
catch (ResourceInitializationException e) {
throw new IOException();
}
}
return pipelineSingleton;
}
示例3: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {
Logger.getRootLogger().setLevel(Level.INFO);
if (args.length > 0)
textFolder = args[0];
// read preprocessed documents
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(BinaryCasReader.class,
BinaryCasReader.PARAM_SOURCE_LOCATION, textFolder, BinaryCasReader.PARAM_PATTERNS, textPattern,
BinaryCasReader.PARAM_LANGUAGE, "en");
// find Open IE tuples
AnalysisEngineDescription openIE = AnalysisEngineFactory.createEngineDescription(OpenIEAnnotator.class);
// write annotated data to file
AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, true,
BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".oie.bin6", BinaryCasWriter.PARAM_OVERWRITE, true);
// run pipeline
SimplePipeline.runPipeline(reader, openIE, writer);
}
示例4: setUp
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
// Create a description of an external resource - a fongo instance, in the same way we would
// have created a shared mongo resource
final ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(
SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");
// Create the analysis engine
final AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoPatternSaver.class,
MongoPatternSaver.KEY_MONGO, erd,
"collection", "test");
ae = AnalysisEngineFactory.createEngine(aed);
ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());
sfr = (SharedFongoResource) ae.getUimaContext()
.getResourceObject(MongoPatternSaver.KEY_MONGO);
}
示例5: run
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
public void run()
throws Exception
{
String modelFolder = "target/theModel";
// train the model we will use later
trainModel(modelFolder);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
TextReader.class, TextReader.PARAM_SOURCE_LOCATION,
"src/main/resources/raw/rawText.txt", TextReader.PARAM_LANGUAGE, "en");
AnalysisEngineDescription seg = AnalysisEngineFactory
.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription tagger = AnalysisEngineFactory.createEngineDescription(
FlexTagUima.class, FlexTagUima.PARAM_LANGUAGE, "en",
FlexTagUima.PARAM_MODEL_LOCATION, modelFolder);
// a helper class which simply iterates each token and prints the predicted POS tag
AnalysisEngineDescription printer = AnalysisEngineFactory
.createEngineDescription(OutputPrinter.class);
SimplePipeline.runPipeline(reader, seg, tagger, printer);
}
示例6: getPreprocessing
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
/**
* Gets the current pre-processing set up
* @return Pre-processing pipeline
* @throws ResourceInitializationException
* for erroneous configurations
*/
public AnalysisEngineDescription getPreprocessing()
throws ResourceInitializationException
{
List<AnalysisEngineDescription> preprocessing = new ArrayList<>();
if (userPreprocessing != null) {
preprocessing.addAll(Arrays.asList(userPreprocessing));
}
preprocessing.add(AnalysisEngineFactory.createEngineDescription(TcPosTaggingWrapper.class,
TcPosTaggingWrapper.PARAM_USE_COARSE_GRAINED, useCoarse));
return AnalysisEngineFactory
.createEngineDescription(preprocessing.toArray(new AnalysisEngineDescription[0]));
}
示例7: runMetaCollection
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
private void runMetaCollection(File luceneFolder)
throws Exception
{
Object[] parameters = new Object[] { LuceneUniGramMetaCollector.PARAM_UNIQUE_EXTRACTOR_NAME,
EXTRACTOR_NAME,
TokenContext.PARAM_SOURCE_LOCATION, luceneFolder.toString(),
LuceneUniGramMetaCollector.PARAM_TARGET_LOCATION, luceneFolder.toString()
};
List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
TestReaderSingleLabel.class, TestReaderSingleLabel.PARAM_LANGUAGE, "en",
TestReaderSingleLabel.PARAM_SOURCE_LOCATION, "src/test/resources/text/input.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory
.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription metaCollector = AnalysisEngineFactory
.createEngineDescription(LuceneUniGramMetaCollector.class, parameterList.toArray());
// run meta collector
SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
示例8: testFigureReferenceAnnotator1
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testFigureReferenceAnnotator1() throws ResourceInitializationException {
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, "src/test/resources/FigureReferenceAnnotator/rfxf.0.xmi",
XmiReader.PARAM_LENIENT, true);
AnalysisEngineDescription engine = AnalysisEngineFactory
.createEngineDescription(FigureReferenceAnnotator.class);
JCasIterator iterator = SimplePipeline.iteratePipeline(reader, engine).iterator();
while (iterator.hasNext()) {
JCas jcas = iterator.next();
assertTrue(JCasUtil.exists(jcas, Figure.class));
for (Figure figure : JCasUtil.select(jcas, Figure.class)) {
assertNotNull(figure.getReference());
assertFalse(figure.getReference().contains(","));
}
}
}
示例9: testAccentInsensitive
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testAccentInsensitive() throws Exception {
AnalysisEngineDescription description = AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:language-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, false,
DictionaryAnnotator.PARAM_DICTIONARY_ACCENT_SENSITIVE, false,
DictionaryAnnotator.PARAM_FEATURE_MAPPING, asList(
"1 -> base"));
JCas jcas = process(description, loadText("wiki-language-with-accents.txt"));
Collection<DictionaryEntry> entries = JCasUtil.select(jcas, DictionaryEntry.class);
assertEquals(8, entries.size());
assertEquals(Lists.newArrayList("capacité d'exprimer", "lingvistinių ženklų",
"programmeringsspråk og språk", "gemäß ihrer genetischen"),
entries.stream().map(DictionaryEntry::getCoveredText).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList("fr", "fr-no-accents", "lt", "lt-no-accents",
"no", "no-no-accents", "de", "de-no-accents"),
entries.stream().map(DictionaryEntry::getBase).distinct().collect(Collectors.toList()));
}
示例10: testAccentSensitive
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testAccentSensitive() throws Exception {
AnalysisEngineDescription description = AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:language-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, false,
DictionaryAnnotator.PARAM_DICTIONARY_ACCENT_SENSITIVE, true,
DictionaryAnnotator.PARAM_FEATURE_MAPPING, asList(
"1 -> base"));
JCas jcas = process(description, loadText("wiki-language-with-accents.txt"));
Collection<DictionaryEntry> entries = JCasUtil.select(jcas, DictionaryEntry.class);
assertEquals(4, entries.size());
assertEquals(Lists.newArrayList("capacité d'exprimer", "lingvistinių ženklų",
"programmeringsspråk og språk", "gemäß ihrer genetischen"),
entries.stream().map(DictionaryEntry::getCoveredText).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList("fr", "lt", "no", "de"),
entries.stream().map(DictionaryEntry::getBase).distinct().collect(Collectors.toList()));
}
示例11: train
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
AggregateBuilder builder = new AggregateBuilder();
builder.add(UriToDocumentTextAnnotator.getDescription());
builder.add(SentenceAnnotator.getDescription());
builder.add(TokenAnnotator.getDescription());
builder.add(PosTaggerAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));
builder.add(AnalysisEngineFactory.createEngineDescription(GoldQuestionCategoryAnnotator.class));
AnalysisEngineDescription documentClassificationAnnotator = AnalysisEngineFactory.createEngineDescription(
QuestionCategoryAnnotator.class, CleartkAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class.getName());
builder.add(documentClassificationAnnotator);
SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
System.err.println("Train model and write model.jar file.");
HideOutput hider = new HideOutput();
Train.main(outputDirectory, this.trainingArguments.toArray(new String[this.trainingArguments.size()]));
hider.restoreOutput();
}
示例12: testmultipleHitsWithText
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testmultipleHitsWithText() throws Exception{
ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(FILE_GAZETTEER, SharedFileResource.class);
AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(File.class, FILE_GAZETTEER, erd, FILE_NAME, getClass().getResource(GAZETTEER_TXT).getPath(), TYPE, LOCATION);
AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);
// the same search term appears multiple times in text...
jCas.setDocumentText("Hello world, and hello world again.");
// but then subset using a Text annotation
new Text(jCas, 10, jCas.getDocumentText().length()).addToIndexes();
ae.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Location.class).size());
Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
assertEquals(WORLD, l.getValue());
assertEquals(WORLD, l.getCoveredText());
assertTrue(l.getBegin() > 10);
ae.destroy();
}
示例13: testmultipleHits
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testmultipleHits() throws Exception{
ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(FILE_GAZETTEER, SharedFileResource.class);
AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(File.class, FILE_GAZETTEER, erd, FILE_NAME, getClass().getResource(GAZETTEER_TXT).getPath(), TYPE, LOCATION);
AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);
// the same search term appears multiple times in text...
jCas.setDocumentText("Hello world, and hello world again.");
ae.process(jCas);
assertEquals(2, JCasUtil.select(jCas, Location.class).size());
Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
assertEquals(WORLD, l.getValue());
assertEquals(WORLD, l.getCoveredText());
ae.destroy();
}
示例14: createSubNormalizerAEDesc
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
private static AnalysisEngineDescription createSubNormalizerAEDesc(String target, URL mappingFile) {
try {
AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
Mapper.class,
Mapper.PARAM_SOURCE, "fr.univnantes.termsuite.types.WordAnnotation:tag",
Mapper.PARAM_TARGET, target,
Mapper.PARAM_UPDATE, true
);
ExternalResourceDescription mappingRes = ExternalResourceFactory.createExternalResourceDescription(
MappingResource.class,
mappingFile
);
ExternalResourceFactory.bindResource(
ae,
Mapping.KEY_MAPPING,
mappingRes
);
return ae;
} catch (Exception e) {
throw new PreparationPipelineException(e);
}
}
示例15: testMalletC45
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入方法依赖的package包/类
@Test
public void testMalletC45() throws Exception {
this.assumeLongTestsEnabled();
this.logger.info(LONG_TEST_MESSAGE);
String outDirectoryName = outputDirectoryName + "/mallet-c45";
AnalysisEngineDescription dataWriter = AnalysisEngineFactory.createEngineDescription(
ExamplePosAnnotator.class,
CleartkSequenceAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
ViterbiDataWriterFactory.class.getName(),
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
outDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletStringOutcomeDataWriter.class.getName(),
ViterbiDataWriterFactory.PARAM_OUTCOME_FEATURE_EXTRACTOR_NAMES,
new String[] { DefaultOutcomeFeatureExtractor.class.getName() });
testClassifier(dataWriter, outDirectoryName, 10, "C45");
String firstLine = FileUtil.loadListOfStrings(new File(outDirectoryName
+ "/2008_Sichuan_earthquake.txt.pos"))[0];
assertEquals(
"2008/CD Sichuan/JJ earthquake/NN From/NN Wikipedia/NN ,/, the/DT free/NN encyclopedia/NN",
firstLine);
}