当前位置: 首页>>代码示例>>Java>>正文


Java CollectionReaderFactory.createReaderDescription方法代码示例

本文整理汇总了Java中org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription方法的典型用法代码示例。如果您正苦于以下问题:Java CollectionReaderFactory.createReaderDescription方法的具体用法?Java CollectionReaderFactory.createReaderDescription怎么用?Java CollectionReaderFactory.createReaderDescription使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.uima.fit.factory.CollectionReaderFactory的用法示例。


在下文中一共展示了CollectionReaderFactory.createReaderDescription方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws UIMAException, IOException {

		Logger.getRootLogger().setLevel(Level.INFO);

		if (args.length > 0)
			textFolder = args[0];

		// read preprocessed documents
		CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(BinaryCasReader.class,
				BinaryCasReader.PARAM_SOURCE_LOCATION, textFolder, BinaryCasReader.PARAM_PATTERNS, textPattern,
				BinaryCasReader.PARAM_LANGUAGE, "en");

		// find Open IE tuples
		AnalysisEngineDescription openIE = AnalysisEngineFactory.createEngineDescription(OpenIEAnnotator.class);

		// write annotated data to file
		AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(BinaryCasWriter.class,
				BinaryCasWriter.PARAM_TARGET_LOCATION, textFolder, BinaryCasWriter.PARAM_STRIP_EXTENSION, true,
				BinaryCasWriter.PARAM_FILENAME_EXTENSION, ".oie.bin6", BinaryCasWriter.PARAM_OVERWRITE, true);

		// run pipeline
		SimplePipeline.runPipeline(reader, openIE, writer);
	}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:24,代码来源:PipelineOpenIE.java

示例2: run

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public void run()
    throws Exception
{
    String language = "en";
    String trainCorpora = DemoConstants.TRAIN_FOLDER_CROSS_VALIDATION;
    String trainFileSuffix = "*.txt";

    CollectionReaderDescription crd = CollectionReaderFactory.createReaderDescription(
            LineTokenTagReader.class, LineTokenTagReader.PARAM_LANGUAGE, language,
            LineTokenTagReader.PARAM_SOURCE_LOCATION, trainCorpora,
            LineTokenTagReader.PARAM_PATTERNS, trainFileSuffix);

    FlexTagCrossValidation flex = new FlexTagCrossValidation(crd, 2);

    if (System.getProperty("DKPRO_HOME") == null) {
        flex.setDKProHomeFolder("target/home");
    }
    flex.setExperimentName("WekaCrossValidationDemo");

    flex.setFeatures(TcFeatureFactory.create(LuceneCharacterNGram.class));
    
    List<Object> configuration = asList(new Object[] { J48.class.getName() });
    flex.setClassifier(Classifier.WEKA, configuration);
    flex.addReport(CvWekafAvgKnownUnknownAccuracyReport.class);
    flex.execute();
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:27,代码来源:ExampleWekaCrossValidation.java

示例3: runMetaCollection

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private void runMetaCollection(File luceneFolder)
    throws Exception
{

    Object[] parameters = new Object[] { LuceneUniGramMetaCollector.PARAM_UNIQUE_EXTRACTOR_NAME,
            EXTRACTOR_NAME,
            TokenContext.PARAM_SOURCE_LOCATION, luceneFolder.toString(),
            LuceneUniGramMetaCollector.PARAM_TARGET_LOCATION, luceneFolder.toString()
            };

    List<Object> parameterList = new ArrayList<Object>(Arrays.asList(parameters));

    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
            TestReaderSingleLabel.class, TestReaderSingleLabel.PARAM_LANGUAGE, "en",
            TestReaderSingleLabel.PARAM_SOURCE_LOCATION, "src/test/resources/text/input.txt");

    AnalysisEngineDescription segmenter = AnalysisEngineFactory
            .createEngineDescription(BreakIteratorSegmenter.class);

    AnalysisEngineDescription metaCollector = AnalysisEngineFactory
            .createEngineDescription(LuceneUniGramMetaCollector.class, parameterList.toArray());

    // run meta collector
    SimplePipeline.runPipeline(reader, segmenter, metaCollector);
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:26,代码来源:LuceneNgramUnitTest.java

示例4: trainModel

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private static void trainModel(String folder)
    throws Exception
{
    String language = "en";
    String corpora = DemoConstants.TRAIN_FOLDER;
    String fileSuffix = "*.txt";

    CollectionReaderDescription trainReader = CollectionReaderFactory.createReaderDescription(
            LineTokenTagReader.class, LineTokenTagReader.PARAM_LANGUAGE, language,
            LineTokenTagReader.PARAM_SOURCE_LOCATION, corpora,
            LineTokenTagReader.PARAM_PATTERNS, fileSuffix);

    FlexTagTrainSaveModel flex = new FlexTagTrainSaveModel(trainReader, new File(folder));

    flex.setFeatures(TcFeatureFactory.create(BrownCluster.class,
            BrownCluster.PARAM_RESOURCE_LOCATION, DemoConstants.BROWN_CLUSTER));

    if (System.getProperty("DKPRO_HOME") == null) {
        flex.setDKProHomeFolder("target/home");
    }
    flex.setExperimentName("ExampleUseModelDemo");
    flex.execute();
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:24,代码来源:ExampleUseModel.java

示例5: main

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	Options options = CliFactory.parseArguments(Options.class, args);

	// a reader that loads the URIs of the training files
	CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(XmiReader.class,
			XmiReader.PARAM_SOURCE_LOCATION, options.getTrainDirectory() + "/*.xmi", XmiReader.PARAM_LENIENT, true);

	// run the pipeline over the training corpus
	SimplePipeline.runPipeline(reader,
			createEngineDescription(ContextWindowAnnotator.class, ContextWindowAnnotator.PARAM_BASE_ANNOTATION,
					FigureMention.class, ContextWindowAnnotator.PARAM_CONTEXT_CLASS, Speech.class,
					ContextWindowAnnotator.PARAM_TARGET_ANNOTATION, TrainingArea.class),
			createEngineDescription(ClearTkMentionAnnotator.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
					DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(),
					DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
					MalletCrfStringOutcomeDataWriter.class),
			createEngineDescription(XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, "target/"));

	// train a Mallet CRF model on the training data
	Train.main(options.getModelDirectory());
}
 
开发者ID:quadrama,项目名称:DramaNLP,代码行数:22,代码来源:MentionDetectionTraining.java

示例6: test3

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void test3() throws Exception {
  File file = new File(inputDir, "b-test2.dat");

  CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
      LineReader.class,
      null,
      LineReader.PARAM_FILE_OR_DIRECTORY_NAME,
      file.getPath(),
      LineReader.PARAM_COMMENT_SPECIFIERS,
      new String[] { "//" },
      LineReader.PARAM_SKIP_BLANK_LINES,
      false);

  JCasIterator jCasIterator = new JCasIterable(reader).iterator();

  test(jCasIterator, "", file.toURI() + "#1");
  test(jCasIterator, "", file.toURI() + "#2");
  test(jCasIterator, "", file.toURI() + "#3");
  test(jCasIterator, "1234|a bc def ghij klmno pqrstu vwxyz", file.toURI() + "#4");
  test(jCasIterator, "", file.toURI() + "#5");
  test(jCasIterator, "    	", file.toURI() + "#6");
  assertFalse(jCasIterator.hasNext());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:25,代码来源:LineReaderTest.java

示例7: test

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void test() throws UIMAException, IOException {
    TypeSystemDescription tsDesc = createTypeSystemDescription("com.textocat.textokit.commons.Commons-TypeSystem");
    CollectionReaderDescription readerDesc =
            CollectionReaderFactory.createReaderDescription(JdbcCollectionReader.class, tsDesc,
                    JdbcCollectionReader.PARAM_DATABASE_URL,
                    "jdbc:hsqldb:mem:jdbc-collection-reader-test;ifexists=true",
                    JdbcCollectionReader.PARAM_USERNAME, "SA",
                    JdbcCollectionReader.PARAM_PASSWORD, "",
                    JdbcCollectionReader.PARAM_DRIVER_CLASS, "org.hsqldb.jdbc.JDBCDriver",
                    JdbcCollectionReader.PARAM_QUERY, "SELECT url, txt FROM doc ORDER BY id OFFSET ? LIMIT ?",
                    JdbcCollectionReader.PARAM_OFFSET_PARAM_INDEX, 1,
                    JdbcCollectionReader.PARAM_LIMIT_PARAM_INDEX, 2,
                    JdbcCollectionReader.PARAM_DOCUMENT_URL_COLUMN, "url",
                    JdbcCollectionReader.PARAM_TEXT_COLUMN, "txt",
                    JdbcCollectionReader.PARAM_BATCH_SIZE, 2,
                    JdbcCollectionReader.PARAM_COUNT_QUERY, "SELECT count(*) FROM doc");
    AnalysisEngineDescription aeDesc = createEngineDescription(AnnotationLogger.class);
    SimplePipeline.runPipeline(readerDesc, aeDesc);
}
 
开发者ID:textocat,项目名称:textokit-core,代码行数:21,代码来源:JdbcCollectionReaderTest.java

示例8: setUp

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
    daoDesc = ExternalResourceFactory.createExternalResourceDescription(
            XmiFileTreeCorpusDAOResource.class, corpusPathString);
    tsd = CasCreationUtils
            .mergeTypeSystems(Sets.newHashSet(
                    XmiFileTreeCorpusDAO.getTypeSystem(corpusPathString),
                    TypeSystemDescriptionFactory
                            .createTypeSystemDescription(),
                    TokenizerAPI.getTypeSystemDescription(),
                    SentenceSplitterAPI.getTypeSystemDescription()));
    readerDesc = CollectionReaderFactory.createReaderDescription(
            CorpusDAOCollectionReader.class, tsd,
            CorpusDAOCollectionReader.CORPUS_DAO_KEY, daoDesc);
    CAS aCAS = CasCreationUtils.createCas(tsd, null, null, null);
    tokenizerSentenceSplitterDesc = AnalysisEngineFactory
            .createEngineDescription(Unitizer.createTokenizerSentenceSplitterAED());
    unitAnnotatorDesc = AnalysisEngineFactory.createEngineDescription(
            UnitAnnotator.class, UnitAnnotator.PARAM_UNIT_TYPE_NAMES,
            unitTypes);
}
 
开发者ID:textocat,项目名称:textokit-core,代码行数:22,代码来源:UnitAnnotatorTest.java

示例9: setUp

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
    daoDesc = ExternalResourceFactory.createExternalResourceDescription(
            XmiFileTreeCorpusDAOResource.class, corpusPathString);
    tsd = CasCreationUtils
            .mergeTypeSystems(Sets.newHashSet(
                    XmiFileTreeCorpusDAO.getTypeSystem(corpusPathString),
                    TypeSystemDescriptionFactory
                            .createTypeSystemDescription(),
                    TokenizerAPI.getTypeSystemDescription(),
                    SentenceSplitterAPI.getTypeSystemDescription()));
    reader = CollectionReaderFactory.createReaderDescription(
            CorpusDAOCollectionReader.class, tsd,
            CorpusDAOCollectionReader.CORPUS_DAO_KEY, daoDesc);
    CAS aCAS = CasCreationUtils.createCas(tsd, null, null, null);
    tokenizerSentenceSplitter = Unitizer.createTokenizerSentenceSplitterAED();
    unitAnnotator = AnalysisEngineFactory.createEngineDescription(
            UnitAnnotator.class, UnitAnnotator.PARAM_UNIT_TYPE_NAMES,
            unitTypes);
    unitClassifier = AnalysisEngineFactory.createEngineDescription(
            UnitClassifier.class, UnitClassifier.PARAM_CLASS_TYPE_NAMES,
            classTypes);
}
 
开发者ID:textocat,项目名称:textokit-core,代码行数:24,代码来源:UnitClassifierTest.java

示例10: run

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
private void run() throws Exception {
    // make TypeSystemDesc
    TypeSystemDescription tsd = TypeSystemDescriptionFactory
            .createTypeSystemDescriptionFromPath(tsFile.toURI().toString());
    // configure CollectionReader
    CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription(
            BratCollectionReader.class, tsd,
            BratCollectionReader.PARAM_BRAT_COLLECTION_DIR, bratCorpusDir.getPath(),
            BratCollectionReader.PARAM_MAPPING_FACTORY_CLASS,
            AutoBratUimaMappingFactory.class.getName());
    // configure AE
    AnalysisEngineDescription aeDesc = createEngineDescription(XmiWriter.class,
            XmiWriter.PARAM_OUTPUTDIR, outputDir.getPath());

    SimplePipeline.runPipeline(colReaderDesc, aeDesc);
}
 
开发者ID:textocat,项目名称:textokit-core,代码行数:17,代码来源:B2U.java

示例11: testNoTLINKs

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testNoTLINKs() throws Exception {
  CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
      FilesCollectionReader.class,
      FilesCollectionReader.PARAM_VIEW_NAME,
      TimeMlGoldAnnotator.TIMEML_VIEW_NAME,
      FilesCollectionReader.PARAM_ROOT_FILE,
      "src/test/resources/data/timeml",
      FilesCollectionReader.PARAM_SUFFIXES,
      new String[] { ".tml" });
  AnalysisEngineDescription engine = AnalysisEngineFactory.createEngineDescription(
      TimeMlGoldAnnotator.class,
      TimeMlGoldAnnotator.PARAM_LOAD_TLINKS,
      false);
  for (JCas jcas : new JCasIterable(reader, engine)) {
    Assert.assertTrue(JCasUtil.select(jcas, Event.class).size() > 0);
    Assert.assertTrue(JCasUtil.select(jcas, Time.class).size() > 0);
    Assert.assertEquals(0, JCasUtil.select(jcas, TemporalLink.class).size());
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:TimeMlGoldAnnotatorTest.java

示例12: testAuto

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testAuto() throws Exception {
    TypeSystemDescription tsd = TypeSystemDescriptionFactory
            .createTypeSystemDescription("desc.types.brat-news-tutorial-TypeSystem");

    CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription(
            BratCollectionReader.class, tsd,
            BratCollectionReader.PARAM_BRAT_COLLECTION_DIR, "data/brat-news-tutorial",
            BratCollectionReader.PARAM_MAPPING_FACTORY_CLASS,
            AutoBratUimaMappingFactory.class.getName(),
            AutoBratUimaMappingFactory.PARAM_NAMESPACES_TO_SCAN, "ace");

    // configure AE
    AnalysisEngineDescription aeDesc = XmiFileWriter.createDescription(
            new File("target/brat-news-tutorial.xmi"));

    SimplePipeline.runPipeline(colReaderDesc, aeDesc);
}
 
开发者ID:textocat,项目名称:textokit-core,代码行数:19,代码来源:B2UTest.java

示例13: testReaderXcas

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testReaderXcas() throws Exception {

  tokenBuilder.buildTokens(jCas, "I like\nspam!", "I like spam !", "PRP VB NN .");
  File outputFile = new File(outputDirectory, "test.xcas");
  FileOutputStream out = new FileOutputStream(outputFile);
  XCASSerializer ser = new XCASSerializer(jCas.getTypeSystem());
  XMLSerializer xmlSer = new XMLSerializer(out, false);
  ser.serialize(jCas.getCas(), xmlSer.getContentHandler());

  CollectionReaderDescription desc = CollectionReaderFactory.createReaderDescription(
      XReader.class,
      FilesCollectionReader.PARAM_ROOT_FILE,
      new File(outputDirectory, "test.xcas").getPath(),
      XReader.PARAM_XML_SCHEME,
      XReader.XCAS);

  jCas = new JCasIterable(desc).iterator().next();

  String jCasText = jCas.getDocumentText();
  String docText = "I like\nspam!";
  Assert.assertEquals(jCasText, docText);

  Token token = JCasUtil.selectByIndex(jCas, Token.class, 0);
  Assert.assertEquals("I", token.getCoveredText());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:XReaderTest.java

示例14: testReaderXmi

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
@Test
public void testReaderXmi() throws Exception {

  tokenBuilder.buildTokens(jCas, "I like\nspam!", "I like spam !", "PRP VB NN .");
  File outputFile = new File(outputDirectory, "test.xmi");
  CasIOUtil.writeXmi(jCas, outputFile);

  CollectionReaderDescription desc = CollectionReaderFactory.createReaderDescription(
      XReader.class,
      FilesCollectionReader.PARAM_ROOT_FILE,
      outputFile.getPath());

  jCas = new JCasIterable(desc).iterator().next();

  String jCasText = jCas.getDocumentText();
  String docText = "I like\nspam!";
  Assert.assertEquals(jCasText, docText);

  Token token = JCasUtil.selectByIndex(jCas, Token.class, 0);
  Assert.assertEquals("I", token.getCoveredText());

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:23,代码来源:XReaderTest.java

示例15: AggregateCollectionReader

import org.apache.uima.fit.factory.CollectionReaderFactory; //导入方法依赖的package包/类
public AggregateCollectionReader(List<CollectionReader> readers,
    TypeSystemDescription tsd) {
try {
    CollectionReaderDescription crd = CollectionReaderFactory
	    .createReaderDescription(AggregateCollectionReader.class, tsd);
    ResourceMetaData metaData = crd.getMetaData();
    ConfigurationParameterSettings paramSettings = metaData
	    .getConfigurationParameterSettings();
    Map<String, Object> additionalParameters = new HashMap<String, Object>();
    additionalParameters
	    .put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS,
		    paramSettings);
    initialize(crd, additionalParameters);

    this.readers = readers;
    this.readerIterator = this.readers.iterator();
    currentReader = this.readerIterator.next();
} catch (ResourceInitializationException rie) {
    throw new RuntimeException(rie);
}
   }
 
开发者ID:BlueBrain,项目名称:bluima,代码行数:22,代码来源:AggregateCollectionReader.java


注:本文中的org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。