当前位置: 首页>>代码示例>>Java>>正文


Java CollectionReader类代码示例

本文整理汇总了Java中org.apache.uima.collection.CollectionReader的典型用法代码示例。如果您正苦于以下问题:Java CollectionReader类的具体用法?Java CollectionReader怎么用?Java CollectionReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CollectionReader类属于org.apache.uima.collection包,在下文中一共展示了CollectionReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeModel

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
/**
 * @param posTagFile
 * @param modelDirectory
 * @param language
 * @throws UIMAException
 * @throws IOException
 */
public static void writeModel(File posTagFile, String modelDirectory, String language) throws UIMAException, IOException {

    CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
            posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());

    AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);

    AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
            NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, "src/main/resources/feature/features.xml",
            NERAnnotator.PARAM_IS_TRAINING, true,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDirectory,
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);

    runPipeline(
            posTagFileReader,
            createEngine(NERReader.class),
            snowballStemmer,
            nerAnnotator
    );
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:28,代码来源:ExecuteNER.java

示例2: classifyTestFile

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void classifyTestFile(String modelDirectory, File testPosFile, String language)
		throws ResourceInitializationException, UIMAException, IOException {

	CollectionReader testPosFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(testPosFile.getAbsolutePath(),
			NERReader.CONLL_VIEW, testPosFile.getName());

	AnalysisEngine nerReader = createEngine(NERReader.class);
	AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);
	AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,

			NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, "src/main/resources/feature/features.xml",
			GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, modelDirectory + "model.jar");
	AnalysisEngine nerWriter = createEngine(NERWriter.class,
			NERWriter.PARAM_NULL_TYPE, "O",
			NERWriter.PARAM_EXPECTED_ENTITY_TYPE_NUM, 9,
			NERWriter.PARAM_FILENAME, "src/test/resources/evaluation/eval.txt",
			NERWriter.PARAM_VERBOSE, true);
	
	runPipeline(
			testPosFileReader,
			nerReader,
			snowballStemmer,
			nerAnnotator,
			nerWriter);
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:26,代码来源:ExecuteNER.java

示例3: writeModel

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
/**
 * @param posTagFile
 * @param configFileName
 * @param language
 * @throws UIMAException
 * @throws IOException
 */
private void writeModel(File posTagFile, String language, String configFileName) throws UIMAException, IOException {

    new File(getModelDir()).mkdirs();

    CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
            posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());

    AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);

    AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
            NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, FEATURE_EXTRACTOR_CONFIG_DIRECTORY + configFileName,
            NERAnnotator.PARAM_IS_TRAINING, true,
            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, getModelDir(),
            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);

    runPipeline(
            posTagFileReader,
            createEngine(NERReader.class),
            snowballStemmer,
            nerAnnotator
    );
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:30,代码来源:ExecuteFeatureAblationTest.java

示例4: train

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
  AggregateBuilder builder = new AggregateBuilder();
  builder.add(UriToDocumentTextAnnotator.getDescription());
  builder.add(SentenceAnnotator.getDescription());
  builder.add(TokenAnnotator.getDescription());
  builder.add(PosTaggerAnnotator.getDescription());
  builder.add(DefaultSnowballStemmer.getDescription("English"));
  builder.add(AnalysisEngineFactory.createEngineDescription(GoldQuestionCategoryAnnotator.class));
  AnalysisEngineDescription documentClassificationAnnotator = AnalysisEngineFactory.createEngineDescription(
      QuestionCategoryAnnotator.class, CleartkAnnotator.PARAM_IS_TRAINING, true,
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class.getName());
  builder.add(documentClassificationAnnotator);
  SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
  System.err.println("Train model and write model.jar file.");
  HideOutput hider = new HideOutput();
  Train.main(outputDirectory, this.trainingArguments.toArray(new String[this.trainingArguments.size()]));
  hider.restoreOutput();
}
 
开发者ID:utk4rsh,项目名称:question-classifier,代码行数:21,代码来源:QuestionCategoryEvaluation.java

示例5: main

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  Options options = CliFactory.parseArguments(Options.class, args);
  CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getTestDirectory(),
      UriCollectionReader.RejectSystemFiles.class, UriCollectionReader.RejectSystemDirectories.class);
  AggregateBuilder builder = new AggregateBuilder();
  builder.add(UriToDocumentTextAnnotator.getDescription());
  builder.add(SentenceAnnotator.getDescription());
  builder.add(TokenAnnotator.getDescription());
  builder.add(PosTaggerAnnotator.getDescription());
  builder.add(DefaultSnowballStemmer.getDescription("English"));
  builder.add(AnalysisEngineFactory.createEngineDescription(QuestionCategoryAnnotator.class,
      CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
      JarClassifierBuilder.getModelJarFile(options.getModelsDirectory())));
  SimplePipeline.runPipeline(reader, builder.createAggregateDescription(),
      AnalysisEngineFactory.createEngineDescription(PrintClassificationsAnnotator.class));
}
 
开发者ID:utk4rsh,项目名称:question-classifier,代码行数:17,代码来源:App.java

示例6: main

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.out.println("arguments: " //
            + "path to XML descriptor for pipeline, " //
            + "folder with text files, " //
            + "folder to output xmi files");
    System.exit(-1);
  }

  // A collection reader that reads text files
  CollectionReader reader = CollectionReaderFactory.createReader(FilesCollectionReader.class,
          null, FilesCollectionReader.PARAM_ROOT_FILE, args[1]);

  AggregateBuilder builder = new AggregateBuilder();
  AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
          new XMLInputSource(RunPipelineXmi.class.getClassLoader().getResourceAsStream(args[0]),
                  new File(".")), new Object[0]);
  builder.add(descriptor);
  builder.add(XmiWriter.getDescription(new File(args[2])));
  
  SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
}
 
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:23,代码来源:RunPipelineXmi.java

示例7: main

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.out.println("arguments: " //
            + "path to XML descriptor for pipeline, " //
            + "folder with text files, " //
            + "path to output tsv file");
    System.exit(-1);
  }

  // A collection reader that reads text files
  CollectionReader reader = CollectionReaderFactory.createReader(FilesCollectionReader.class,
          null, FilesCollectionReader.PARAM_ROOT_FILE, args[1]);

  AggregateBuilder builder = new AggregateBuilder();
  AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
          new XMLInputSource(RunPipelineTsv.class.getClassLoader().getResourceAsStream(args[0]),
                  new File(".")), new Object[0]);
  builder.add(descriptor);
  builder.add(TsvWriter.getDescription(new File(args[2])));

  SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
}
 
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:23,代码来源:RunPipelineTsv.java

示例8: main

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	// A collection reader that reads XMIs
	CollectionReader reader = CollectionReaderFactory.createReader(XReader.class, null, XReader.PARAM_ROOT_FILE,
			args[0]);

	// The pipeline of annotators
	AggregateBuilder builder = new AggregateBuilder();

	// other annotators, if needed
	builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
			new XMLInputSource("src/main/resources/org/ie4opendata/octroy/SimpleFrenchTokenAndSentenceAnnotator.xml")));

	// Use this to get the parameters for the descriptor
	//System.out.println(ReasonAnnotator.getClassifierDescription("org/ie4opendata/octroy/reason/model.jar"));
	//System.exit(0);
	// The reason classifier annotator, configured to write training data
	builder.add(ReasonAnnotator.getWriterDescription("src/main/resources/org/ie4opendata/octroy/reason"));

	// Run the pipeline of annotators on each of the CASes produced by the reader
	SimplePipeline.runPipeline(reader, builder.createAggregateDescription());

	// Train a classifier on the training data, and package it into a .jar file
	Train.main("src/main/resources/org/ie4opendata/octroy/reason");
}
 
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:25,代码来源:ReasonTrainer.java

示例9: testCollectionReader

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader() throws IOException, UIMAException {
	File collectionFile = createSampleCollectionFile();
	int numToSkip = 0;
	int numToProcess = -1; // process all
	CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC1_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC2_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC3_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:25,代码来源:DocumentPerLineCollectionReaderTest.java

示例10: testCollectionReader_LimitingNumberProcessed

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader_LimitingNumberProcessed() throws IOException, UIMAException {
	File collectionFile = createSampleCollectionFile();
	int numToSkip = 0;
	int numToProcess = 1; // process one
	CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC1_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:17,代码来源:DocumentPerLineCollectionReaderTest.java

示例11: testCollectionReader_SkippingOneAndLimitingNumberProcessed

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader_SkippingOneAndLimitingNumberProcessed() throws IOException, UIMAException {
	File collectionFile = createSampleCollectionFile();
	int numToSkip = 1;
	int numToProcess = 1; // process one
	CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC2_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:17,代码来源:DocumentPerLineCollectionReaderTest.java

示例12: testClasspathCollectionReader

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader() throws UIMAException, IOException {
	int numToSkip = 0;
	int numToProcess = -1;
	CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC1_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC2_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC3_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:24,代码来源:ClasspathCollectionReaderTest.java

示例13: testClasspathCollectionReader_SkippingOne

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader_SkippingOne() throws UIMAException, IOException {
	int numToSkip = 1;
	int numToProcess = -1;
	CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC2_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC3_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:20,代码来源:ClasspathCollectionReaderTest.java

示例14: testClasspathCollectionReader_ProcessTwo

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader_ProcessTwo() throws UIMAException, IOException {
	int numToSkip = 0;
	int numToProcess = 2;
	CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	assertTrue(jCasIterable.hasNext());
	JCas jCas = jCasIterable.next();
	assertEquals(DOC1_TEXT, jCas.getDocumentText());

	assertTrue(jCasIterable.hasNext());
	jCas = jCasIterable.next();
	assertEquals(DOC2_TEXT, jCas.getDocumentText());

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:20,代码来源:ClasspathCollectionReaderTest.java

示例15: testPubmedXmlCollectionReader

import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testPubmedXmlCollectionReader() throws UIMAException, IOException {
	int numToSkip = 0;
	int numToProcess = -1; // process all
	CollectionReader cr = PubmedXmlFileCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
			samplePubmedXmlFile, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);

	JCasIterable jCasIterable = new JCasIterable(cr);

	int count = 1;
	while (jCasIterable.hasNext()) {
		String documentText = String.format("Title %d\n\nAbstract %d", count, count);
		if (count == 8)
			documentText = "Title 8\n\nBACKGROUND: Abstract 8a\nOBJECTIVE: Abstract 8b\nMETHODS: Abstract 8c\nMETHODS: Abstract 8d\nMETHODS: Abstract 8e\nRESULTS: Abstract 8f\nCONCLUSIONS: Abstract 8";
		if (count == 11)
			documentText = "Book Title 1\n\nBook Abstract 1a\n\nBook Abstract 1b\n\nBook Abstract 1c\n\nBook Abstract 1d";
		JCas jCas = jCasIterable.next();
		assertEquals(documentText, jCas.getDocumentText());
		count++;
	}

	assertFalse(jCasIterable.hasNext());
}
 
开发者ID:UCDenver-ccp,项目名称:ccp-nlp,代码行数:24,代码来源:PubmedXmlFileCollectionReaderTest.java


注:本文中的org.apache.uima.collection.CollectionReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。