本文整理汇总了Java中org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription方法的典型用法代码示例。如果您正苦于以下问题:Java CollectionReaderFactory.createReaderDescription方法的具体用法?Java CollectionReaderFactory.createReaderDescription怎么用?Java CollectionReaderFactory.createReaderDescription使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.uima.fit.factory.CollectionReaderFactory
的用法示例。
在下文中一共展示了CollectionReaderFactory.createReaderDescription方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {
Logger.getRootLogger().setLevel(Level.INFO);
if (args.length > 0)
textFolder = args[0];
// read preprocessed documents
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(BinaryCasReader.class,
BinaryCasReader.PARAM_SOURCE_LOCATION, textFolder, BinaryCasReader.PARAM_PATTERNS, textPattern,
BinaryCasReader.PARAM_LANGUAGE, "en");
// find Open IE tuples
AnalysisEngineDescription openIE = AnalysisEngineFactory.createEngineDescription(OpenIEAnnotator.class);
// write annotated data to file
AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, true,
BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".oie.bin6", BinaryCasWriter.PARAM_OVERWRITE, true);
// run pipeline
SimplePipeline.runPipeline(reader, openIE, writer);
}
示例2: run
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public void run()
throws Exception
{
String language = "en";
String trainCorpora = DemoConstants.TRAIN_FOLDER_CROSS_VALIDATION;
String trainFileSuffix = "*.txt";
CollectionReaderDescription crd = CollectionReaderFactory.createReaderDescription(
LineTokenTagReader.class, LineTokenTagReader.PARAM_LANGUAGE, language,
LineTokenTagReader.PARAM_SOURCE_LOCATION, trainCorpora,
LineTokenTagReader.PARAM_PATTERNS, trainFileSuffix);
FlexTagCrossValidation flex = new FlexTagCrossValidation(crd, 2);
if (System.getProperty("DKPRO_HOME") == null) {
flex.setDKProHomeFolder("target/home");
}
flex.setExperimentName("WekaCrossValidationDemo");
flex.setFeatures(TcFeatureFactory.create(LuceneCharacterNGram.class));
List<Object> configuration = asList(new Object[] { J48.class.getName() });
flex.setClassifier(Classifier.WEKA, configuration);
flex.addReport(CvWekafAvgKnownUnknownAccuracyReport.class);
flex.execute();
}
示例3: runMetaCollection
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private void runMetaCollection(File luceneFolder)
throws Exception
{
Object[] parameters = new Object[] { LuceneUniGramMetaCollector.PARAM_UNIQUE_EXTRACTOR_NAME,
EXTRACTOR_NAME,
TokenContext.PARAM_SOURCE_LOCATION, luceneFolder.toString(),
LuceneUniGramMetaCollector.PARAM_TARGET_LOCATION, luceneFolder.toString()
};
List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
TestReaderSingleLabel.class, TestReaderSingleLabel.PARAM_LANGUAGE, "en",
TestReaderSingleLabel.PARAM_SOURCE_LOCATION, "src/test/resources/text/input.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory
.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription metaCollector = AnalysisEngineFactory
.createEngineDescription(LuceneUniGramMetaCollector.class, parameterList.toArray());
// run meta collector
SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
示例4: trainModel
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private static void trainModel(String folder)
throws Exception
{
String language = "en";
String corpora = DemoConstants.TRAIN_FOLDER;
String fileSuffix = "*.txt";
CollectionReaderDescription trainReader = CollectionReaderFactory.createReaderDescription(
LineTokenTagReader.class, LineTokenTagReader.PARAM_LANGUAGE, language,
LineTokenTagReader.PARAM_SOURCE_LOCATION, corpora,
LineTokenTagReader.PARAM_PATTERNS, fileSuffix);
FlexTagTrainSaveModel flex = new FlexTagTrainSaveModel(trainReader, new File(folder));
flex.setFeatures(TcFeatureFactory.create(BrownCluster.class,
BrownCluster.PARAM_RESOURCE_LOCATION, DemoConstants.BROWN_CLUSTER));
if (System.getProperty("DKPRO_HOME") == null) {
flex.setDKProHomeFolder("target/home");
}
flex.setExperimentName("ExampleUseModelDemo");
flex.execute();
}
示例5: main
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
// a reader that loads the URIs of the training files
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);
// run the pipeline over the training corpus
SimplePipeline.runPipeline(reader,
createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
MalletCrfStringOutcomeDataWriter.class),
createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));
// train a Mallet CRF model on the training data
Train.main(options.getModelDirectory());
}
示例6: test3
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void test3() throws Exception {
File file = new File(inputDir, "b-test2.dat");
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
LineReader.class,
null,
LineReader.PARAM_FILE_OR_DIRECTORY_NAME,
file.getPath(),
LineReader.PARAM_COMMENT_SPECIFIERS,
new String[] { "//" },
LineReader.PARAM_SKIP_BLANK_LINES,
false);
JCasIterator jCasIterator = new JCasIterable(reader).iterator();
test(jCasIterator, "", file.toURI() + "#1");
test(jCasIterator, "", file.toURI() + "#2");
test(jCasIterator, "", file.toURI() + "#3");
test(jCasIterator, "1234|a bc def ghij klmno pqrstu vwxyz", file.toURI() + "#4");
test(jCasIterator, "", file.toURI() + "#5");
test(jCasIterator, " ", file.toURI() + "#6");
assertFalse(jCasIterator.hasNext());
}
示例7: test
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void test() throws UIMAException, IOException {
TypeSystemDescription tsDesc = createTypeSystemDescription("com.textocat.textokit.commons.Commons-TypeSystem");
CollectionReaderDescription readerDesc =
CollectionReaderFactory.createReaderDescription(JdbcCollectionReader.class, tsDesc,
JdbcCollectionReader.PARAM_DATABASE_URL,
"jdbc:hsqldb:mem:jdbc-collection-reader-test;ifexists=true",
JdbcCollectionReader.PARAM_USERNAME, "SA",
JdbcCollectionReader.PARAM_PASSWORD, "",
JdbcCollectionReader.PARAM_DRIVER_CLASS, "org.hsqldb.jdbc.JDBCDriver",
JdbcCollectionReader.PARAM_QUERY, "SELECT url, txt FROM doc ORDER BY id OFFSET ? LIMIT ?",
JdbcCollectionReader.PARAM_OFFSET_PARAM_INDEX, 1,
JdbcCollectionReader.PARAM_LIMIT_PARAM_INDEX, 2,
JdbcCollectionReader.PARAM_DOCUMENT_URL_COLUMN, "url",
JdbcCollectionReader.PARAM_TEXT_COLUMN, "txt",
JdbcCollectionReader.PARAM_BATCH_SIZE, 2,
JdbcCollectionReader.PARAM_COUNT_QUERY, "SELECT count(*) FROM doc");
AnalysisEngineDescription aeDesc = createEngineDescription(AnnotationLogger.class);
SimplePipeline.runPipeline(readerDesc, aeDesc);
}
示例8: setUp
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
daoDesc = ExternalResourceFactory.createExternalResourceDescription(
XmiFileTreeCorpusDAOResource.class, corpusPathString);
tsd = CasCreationUtils
.mergeTypeSystems(Sets.newHashSet(
XmiFileTreeCorpusDAO.getTypeSystem(corpusPathString),
TypeSystemDescriptionFactory
.createTypeSystemDescription(),
TokenizerAPI.getTypeSystemDescription(),
SentenceSplitterAPI.getTypeSystemDescription()));
readerDesc = CollectionReaderFactory.createReaderDescription(
CorpusDAOCollectionReader.class, tsd,
CorpusDAOCollectionReader.CORPUS_DAO_KEY, daoDesc);
CAS aCAS = CasCreationUtils.createCas(tsd, null, null, null);
tokenizerSentenceSplitterDesc = AnalysisEngineFactory
.createEngineDescription(Unitizer.createTokenizerSentenceSplitterAED());
unitAnnotatorDesc = AnalysisEngineFactory.createEngineDescription(
UnitAnnotator.class, UnitAnnotator.PARAM_UNIT_TYPE_NAMES,
unitTypes);
}
示例9: setUp
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
daoDesc = ExternalResourceFactory.createExternalResourceDescription(
XmiFileTreeCorpusDAOResource.class, corpusPathString);
tsd = CasCreationUtils
.mergeTypeSystems(Sets.newHashSet(
XmiFileTreeCorpusDAO.getTypeSystem(corpusPathString),
TypeSystemDescriptionFactory
.createTypeSystemDescription(),
TokenizerAPI.getTypeSystemDescription(),
SentenceSplitterAPI.getTypeSystemDescription()));
reader = CollectionReaderFactory.createReaderDescription(
CorpusDAOCollectionReader.class, tsd,
CorpusDAOCollectionReader.CORPUS_DAO_KEY, daoDesc);
CAS aCAS = CasCreationUtils.createCas(tsd, null, null, null);
tokenizerSentenceSplitter = Unitizer.createTokenizerSentenceSplitterAED();
unitAnnotator = AnalysisEngineFactory.createEngineDescription(
UnitAnnotator.class, UnitAnnotator.PARAM_UNIT_TYPE_NAMES,
unitTypes);
unitClassifier = AnalysisEngineFactory.createEngineDescription(
UnitClassifier.class, UnitClassifier.PARAM_CLASS_TYPE_NAMES,
classTypes);
}
示例10: run
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private void run() throws Exception {
// make TypeSystemDesc
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescriptionFromPath(tsFile.toURI().toString());
// configure CollectionReader
CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription(
BratCollectionReader.class, tsd,
BratCollectionReader.PARAM_BRAT_COLLECTION_DIR, bratCorpusDir.getPath(),
BratCollectionReader.PARAM_MAPPING_FACTORY_CLASS,
AutoBratUimaMappingFactory.class.getName());
// configure AE
AnalysisEngineDescription aeDesc = createEngineDescription(XmiWriter.class,
XmiWriter.PARAM_OUTPUTDIR, outputDir.getPath());
SimplePipeline.runPipeline(colReaderDesc, aeDesc);
}
示例11: testNoTLINKs
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testNoTLINKs() throws Exception {
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
FilesCollectionReader.class,
FilesCollectionReader.PARAM_VIEW_NAME,
TimeMlGoldAnnotator.TIMEML_VIEW_NAME,
FilesCollectionReader.PARAM_ROOT_FILE,
"src/test/resources/data/timeml",
FilesCollectionReader.PARAM_SUFFIXES,
new String[] { ".tml" });
AnalysisEngineDescription engine = AnalysisEngineFactory.createEngineDescription(
TimeMlGoldAnnotator.class,
TimeMlGoldAnnotator.PARAM_LOAD_TLINKS,
false);
for (JCas jcas : new JCasIterable(reader, engine)) {
Assert.assertTrue(JCasUtil.select(jcas, Event.class).size() > 0);
Assert.assertTrue(JCasUtil.select(jcas, Time.class).size() > 0);
Assert.assertEquals(0, JCasUtil.select(jcas, TemporalLink.class).size());
}
}
示例12: testAuto
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testAuto() throws Exception {
TypeSystemDescription tsd = TypeSystemDescriptionFactory
.createTypeSystemDescription("desc.types.brat-news-tutorial-TypeSystem");
CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription(
BratCollectionReader.class, tsd,
BratCollectionReader.PARAM_BRAT_COLLECTION_DIR, "data/brat-news-tutorial",
BratCollectionReader.PARAM_MAPPING_FACTORY_CLASS,
AutoBratUimaMappingFactory.class.getName(),
AutoBratUimaMappingFactory.PARAM_NAMESPACES_TO_SCAN, "ace");
// configure AE
AnalysisEngineDescription aeDesc = XmiFileWriter.createDescription(
new File("target/brat-news-tutorial.xmi"));
SimplePipeline.runPipeline(colReaderDesc, aeDesc);
}
示例13: testReaderXcas
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testReaderXcas() throws Exception {
tokenBuilder.buildTokens(jCas, "I like\nspam!", "I like spam !", "PRP VB NN .");
File outputFile = new File(outputDirectory, "test.xcas");
FileOutputStream out = new FileOutputStream(outputFile);
XCASSerializer ser = new XCASSerializer(jCas.getTypeSystem());
XMLSerializer xmlSer = new XMLSerializer(out, false);
ser.serialize(jCas.getCas(), xmlSer.getContentHandler());
CollectionReaderDescription desc = CollectionReaderFactory.createReaderDescription(
XReader.class,
FilesCollectionReader.PARAM_ROOT_FILE,
new File(outputDirectory, "test.xcas").getPath(),
XReader.PARAM_XML_SCHEME,
XReader.XCAS);
jCas = new JCasIterable(desc).iterator().next();
String jCasText = jCas.getDocumentText();
String docText = "I like\nspam!";
Assert.assertEquals(jCasText, docText);
Token token = JCasUtil.selectByIndex(jCas, Token.class, 0);
Assert.assertEquals("I", token.getCoveredText());
}
示例14: testReaderXmi
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testReaderXmi() throws Exception {
tokenBuilder.buildTokens(jCas, "I like\nspam!", "I like spam !", "PRP VB NN .");
File outputFile = new File(outputDirectory, "test.xmi");
CasIOUtil.writeXmi(jCas, outputFile);
CollectionReaderDescription desc = CollectionReaderFactory.createReaderDescription(
XReader.class,
FilesCollectionReader.PARAM_ROOT_FILE,
outputFile.getPath());
jCas = new JCasIterable(desc).iterator().next();
String jCasText = jCas.getDocumentText();
String docText = "I like\nspam!";
Assert.assertEquals(jCasText, docText);
Token token = JCasUtil.selectByIndex(jCas, Token.class, 0);
Assert.assertEquals("I", token.getCoveredText());
}
示例15: AggregateCollectionReader
import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public AggregateCollectionReader(List<CollectionReader> readers,
TypeSystemDescription tsd) {
try {
CollectionReaderDescription crd = CollectionReaderFactory
.createReaderDescription(AggregateCollectionReader.class, tsd);
ResourceMetaData metaData = crd.getMetaData();
ConfigurationParameterSettings paramSettings = metaData
.getConfigurationParameterSettings();
Map<String, Object> additionalParameters = new HashMap<String, Object>();
additionalParameters
.put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS,
paramSettings);
initialize(crd, additionalParameters);
this.readers = readers;
this.readerIterator = this.readers.iterator();
currentReader = this.readerIterator.next();
} catch (ResourceInitializationException rie) {
throw new RuntimeException(rie);
}
}