本文整理汇总了Java中org.apache.uima.fit.factory.AnalysisEngineFactory类的典型用法代码示例。如果您正苦于以下问题:Java AnalysisEngineFactory类的具体用法?Java AnalysisEngineFactory怎么用?Java AnalysisEngineFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AnalysisEngineFactory类属于org.apache.uima.fit.factory包,在下文中一共展示了AnalysisEngineFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public static void main(String[] args) throws ResourceInitializationException, UIMAException, IOException {
SimplePipeline.runPipeline(
CollectionReaderFactory.createReaderDescription(TextgridTEIUrlReader.class,
TextgridTEIUrlReader.PARAM_INPUT, "http://www.textgridrep.org/textgrid:tx4z.0",
TextgridTEIUrlReader.PARAM_CLEANUP, true),
AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_USE_DOCUMENT_ID, true,
XmiWriter.PARAM_TARGET_LOCATION, "src/test/resources/SpeakerAssignmentRules/"));
SimplePipeline.runPipeline(
CollectionReaderFactory.createReaderDescription(TextgridTEIUrlReader.class,
TextgridTEIUrlReader.PARAM_INPUT, "http://www.textgridrep.org/textgrid:w3zd.0",
TextgridTEIUrlReader.PARAM_CLEANUP, true),
AnalysisEngineFactory.createEngineDescription(XmiWriter.class, XmiWriter.PARAM_USE_DOCUMENT_ID, true,
XmiWriter.PARAM_TARGET_LOCATION, "src/test/resources/SpeakerAssignmentRules/"));
new File("src/test/resources/SpeakerAssignmentRules/typesystem.xml").delete();
}
示例2: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../desc/TypeSystem.xml");
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("Patient is a 30-year-old man named Leroy Butler from Green Bay, WI.");
AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MistAnalysisEngine.class,
MistAnalysisEngine.PARAM_MODEL_PATH,
"SHARP/model/model");
SimplePipeline.runPipeline(jcas, aed);
for(Annotation annot : JCasUtil.select(jcas, Annotation.class)){
System.out.println("Found annotation: " + annot.getCoveredText());
}
JCas deidView = jcas.getView(MistAnalysisEngine.DEID_VIEW_NAME);
System.out.println("Deidentified version:");
System.out.println(deidView.getDocumentText());
}
示例3: exportToHTML
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public static void exportToHTML(File inputFile, File outputFile)
throws Exception
{
File intermediateXMIsFile = File.createTempFile("temp", ".xmi.tar.gz");
SingleXMLToXMIExporter.exportToXMIs(inputFile, intermediateXMIsFile);
SimplePipeline.runPipeline(
CollectionReaderFactory.createReaderDescription(
CompressedXmiReader.class,
CompressedXmiReader.PARAM_SOURCE_LOCATION, intermediateXMIsFile
),
AnalysisEngineFactory.createEngineDescription(ArgumentsToHTMLExporter.class,
ArgumentsToHTMLExporter.PARAM_OUTPUT_FILE, outputFile),
AnalysisEngineFactory.createEngineDescription(
ArgumentDumpWriter.class
)
);
Files.delete(intermediateXMIsFile.toPath());
}
开发者ID:UKPLab,项目名称:argument-reasoning-comprehension-task,代码行数:22,代码来源:Step09AnnotatedDataHTMLExporter.java
示例4: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public static void main(String[] args)
throws Exception
{
String in = "/tmp/temp-in";
String out = "/tmp/out2.tar.gz";
// test it
SimplePipeline.runPipeline(CollectionReaderFactory.createReaderDescription(
XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, in,
XmiReader.PARAM_PATTERNS, XmiReader.INCLUDE_PREFIX + "*.xmi"
),
AnalysisEngineFactory.createEngineDescription(
NoOpAnnotator.class
),
AnalysisEngineFactory.createEngineDescription(
CompressedXmiWriter.class,
CompressedXmiWriter.PARAM_OUTPUT_FILE, out
)
);
}
示例5: getPipeline
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
/**
* Creates a tokenizing pipeline
*
* @throws IOException exception
*/
private static AnalysisEngineDescription getPipeline()
throws IOException
{
if (pipelineSingleton == null) {
try {
pipelineSingleton = AnalysisEngineFactory.createEngineDescription(
AnalysisEngineFactory.createEngineDescription(ParagraphSplitter.class,
ParagraphSplitter.PARAM_SPLIT_PATTERN,
ParagraphSplitter.SINGLE_LINE_BREAKS_PATTERN),
AnalysisEngineFactory.createEngineDescription(ArkTweetTokenizerFixed.class),
AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
StanfordSegmenter.PARAM_WRITE_TOKEN, false,
StanfordSegmenter.PARAM_ZONE_TYPES,
Paragraph.class.getCanonicalName()));
}
catch (ResourceInitializationException e) {
throw new IOException();
}
}
return pipelineSingleton;
}
示例6: main
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {
Logger.getRootLogger().setLevel(Level.INFO);
if (args.length > 0)
textFolder = args[0];
// read preprocessed documents
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(BinaryCasReader.class,
BinaryCasReader.PARAM_SOURCE_LOCATION, textFolder, BinaryCasReader.PARAM_PATTERNS, textPattern,
BinaryCasReader.PARAM_LANGUAGE, "en");
// find Open IE tuples
AnalysisEngineDescription openIE = AnalysisEngineFactory.createEngineDescription(OpenIEAnnotator.class);
// write annotated data to file
AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, true,
BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".oie.bin6", BinaryCasWriter.PARAM_OVERWRITE, true);
// run pipeline
SimplePipeline.runPipeline(reader, openIE, writer);
}
示例7: beforeTest
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
@Override
public void beforeTest() throws UIMAException {
super.beforeTest();
final ExternalResourceDescription tokensDesc = ExternalResourceFactory.createExternalResourceDescription(
"lexica",
ClearNlpLexica.class);
final AnalysisEngineDescription tokeniserDesc = AnalysisEngineFactory.createEngineDescription(
ClearNlpTokeniser.class,
"lexica",
tokensDesc);
tokeniserAe = AnalysisEngineFactory.createEngine(tokeniserDesc);
final AnalysisEngineDescription parserDesc = AnalysisEngineFactory.createEngineDescription(ClearNlpParser.class,
"lexica",
tokensDesc);
ae = AnalysisEngineFactory.createEngine(parserDesc);
}
示例8: setUp
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
// Create a description of an external resource - a fongo instance, in the same way we would
// have created a shared mongo resource
final ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(
SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");
// Create the analysis engine
final AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MongoPatternSaver.class,
MongoPatternSaver.KEY_MONGO, erd,
"collection", "test");
ae = AnalysisEngineFactory.createEngine(aed);
ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());
sfr = (SharedFongoResource) ae.getUimaContext()
.getResourceObject(MongoPatternSaver.KEY_MONGO);
}
示例9: run
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
public void run()
throws Exception
{
String modelFolder = "target/theModel";
// train the model we will use later
trainModel(modelFolder);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
TextReader.class, TextReader.PARAM_SOURCE_LOCATION,
"src/main/resources/raw/rawText.txt", TextReader.PARAM_LANGUAGE, "en");
AnalysisEngineDescription seg = AnalysisEngineFactory
.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription tagger = AnalysisEngineFactory.createEngineDescription(
FlexTagUima.class, FlexTagUima.PARAM_LANGUAGE, "en",
FlexTagUima.PARAM_MODEL_LOCATION, modelFolder);
// a helper class which simply iterates each token and prints the predicted POS tag
AnalysisEngineDescription printer = AnalysisEngineFactory
.createEngineDescription(OutputPrinter.class);
SimplePipeline.runPipeline(reader, seg, tagger, printer);
}
示例10: getPreprocessing
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
/**
* Gets the current pre-processing set up
* @return Pre-processing pipeline
* @throws ResourceInitializationException
* for erroneous configurations
*/
public AnalysisEngineDescription getPreprocessing()
throws ResourceInitializationException
{
List<AnalysisEngineDescription> preprocessing = new ArrayList<>();
if (userPreprocessing != null) {
preprocessing.addAll(Arrays.asList(userPreprocessing));
}
preprocessing.add(AnalysisEngineFactory.createEngineDescription(TcPosTaggingWrapper.class,
TcPosTaggingWrapper.PARAM_USE_COARSE_GRAINED, useCoarse));
return AnalysisEngineFactory
.createEngineDescription(preprocessing.toArray(new AnalysisEngineDescription[0]));
}
示例11: runMetaCollection
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
private void runMetaCollection(File luceneFolder)
throws Exception
{
Object[] parameters = new Object[] { LuceneUniGramMetaCollector.PARAM_UNIQUE_EXTRACTOR_NAME,
EXTRACTOR_NAME,
TokenContext.PARAM_SOURCE_LOCATION, luceneFolder.toString(),
LuceneUniGramMetaCollector.PARAM_TARGET_LOCATION, luceneFolder.toString()
};
List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
TestReaderSingleLabel.class, TestReaderSingleLabel.PARAM_LANGUAGE, "en",
TestReaderSingleLabel.PARAM_SOURCE_LOCATION, "src/test/resources/text/input.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory
.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription metaCollector = AnalysisEngineFactory
.createEngineDescription(LuceneUniGramMetaCollector.class, parameterList.toArray());
// run meta collector
SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
示例12: createDictionaryAnnotatorEngine
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
private static AnalysisEngine createDictionaryAnnotatorEngine() throws Exception {
AggregateBuilder builder = new AggregateBuilder();
builder.add(AnalysisEngineFactory.createEngineDescription(SimpleTokenizer.class,
UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(),
UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName()));
builder.add(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:benchmark-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_CSV_SEPARATOR, ";",
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, true,
DictionaryAnnotator.PARAM_DICTIONARY_ACCENT_SENSITIVE, true));
AnalysisEngine engine = AnalysisEngineFactory.createEngine(builder.createAggregateDescription());
return engine;
}
示例13: testFeatureAssignment
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
@Test
public void testFeatureAssignment() throws Exception {
AnalysisEngineDescription description = AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:nlproc-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_FEATURE_MAPPING, asList(
"1 -> base"));
JCas jcas = process(description, loadText("wiki-nlproc.txt"));
Collection<DictionaryEntry> entries = JCasUtil.select(jcas, DictionaryEntry.class);
assertEquals(8, entries.size());
assertEquals(Lists.newArrayList("hand-written rules", "machine learning", "Natural language generation",
"Natural language understanding", "Natural language search"),
entries.stream().map(DictionaryEntry::getCoveredText).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList("method", "method", "method", "method", "task", "task", "task", "task"),
entries.stream().map(DictionaryEntry::getBase).collect(Collectors.toList()));
assertEquals(Lists.newArrayList(0),
entries.stream().map(DictionaryEntry::getId).distinct().collect(Collectors.toList()));
}
示例14: testCaseInsensitive
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
@Test
public void testCaseInsensitive() throws Exception {
AnalysisEngineDescription description = AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:nlproc-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, false,
DictionaryAnnotator.PARAM_FEATURE_MAPPING, asList(
"1 -> base", "2 -> id"));
JCas jcas = process(description, loadText("wiki-nlproc.txt"));
Collection<DictionaryEntry> entries = JCasUtil.select(jcas, DictionaryEntry.class);
assertEquals(11, entries.size());
assertEquals(Lists.newArrayList("Computing Machinery and Intelligence", "hand-written rules",
"machine learning", "Anaphora resolution",
"Natural language generation", "Natural language understanding", "Natural language search"),
entries.stream().map(DictionaryEntry::getCoveredText).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList("computing machinery", "computing intelligence", "method", "task"),
entries.stream().map(DictionaryEntry::getBase).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList(3, 2, 1),
entries.stream().map(DictionaryEntry::getId).distinct().collect(Collectors.toList()));
}
示例15: testAccentInsensitive
import org.apache.uima.fit.factory.AnalysisEngineFactory; //导入依赖的package包/类
@Test
public void testAccentInsensitive() throws Exception {
AnalysisEngineDescription description = AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class,
DictionaryAnnotator.PARAM_DICTIONARY_LOCATION, "classpath:language-dictionary.csv",
DictionaryAnnotator.PARAM_TOKENIZER_CLASS, SimpleOpenNlpTokenizer.class.getName(),
DictionaryAnnotator.PARAM_ANNOTATION_TYPE, DictionaryEntry.class.getName(),
DictionaryAnnotator.PARAM_DICTIONARY_CASE_SENSITIVE, false,
DictionaryAnnotator.PARAM_DICTIONARY_ACCENT_SENSITIVE, false,
DictionaryAnnotator.PARAM_FEATURE_MAPPING, asList(
"1 -> base"));
JCas jcas = process(description, loadText("wiki-language-with-accents.txt"));
Collection<DictionaryEntry> entries = JCasUtil.select(jcas, DictionaryEntry.class);
assertEquals(8, entries.size());
assertEquals(Lists.newArrayList("capacité d'exprimer", "lingvistinių ženklų",
"programmeringsspråk og språk", "gemäß ihrer genetischen"),
entries.stream().map(DictionaryEntry::getCoveredText).distinct().collect(Collectors.toList()));
assertEquals(Lists.newArrayList("fr", "fr-no-accents", "lt", "lt-no-accents",
"no", "no-no-accents", "de", "de-no-accents"),
entries.stream().map(DictionaryEntry::getBase).distinct().collect(Collectors.toList()));
}