本文整理汇总了Java中org.apache.uima.collection.CollectionReader类的典型用法代码示例。如果您正苦于以下问题:Java CollectionReader类的具体用法?Java CollectionReader怎么用?Java CollectionReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CollectionReader类属于org.apache.uima.collection包,在下文中一共展示了CollectionReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeModel
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
/**
* @param posTagFile
* @param modelDirectory
* @param language
* @throws UIMAException
* @throws IOException
*/
public static void writeModel(File posTagFile, String modelDirectory, String language) throws UIMAException, IOException {
CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());
AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);
AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, "src/main/resources/feature/features.xml",
NERAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, modelDirectory,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);
runPipeline(
posTagFileReader,
createEngine(NERReader.class),
snowballStemmer,
nerAnnotator
);
}
示例2: classifyTestFile
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void classifyTestFile(String modelDirectory, File testPosFile, String language)
throws ResourceInitializationException, UIMAException, IOException {
CollectionReader testPosFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(testPosFile.getAbsolutePath(),
NERReader.CONLL_VIEW, testPosFile.getName());
AnalysisEngine nerReader = createEngine(NERReader.class);
AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);
AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, "src/main/resources/feature/features.xml",
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, modelDirectory + "model.jar");
AnalysisEngine nerWriter = createEngine(NERWriter.class,
NERWriter.PARAM_NULL_TYPE, "O",
NERWriter.PARAM_EXPECTED_ENTITY_TYPE_NUM, 9,
NERWriter.PARAM_FILENAME, "src/test/resources/evaluation/eval.txt",
NERWriter.PARAM_VERBOSE, true);
runPipeline(
testPosFileReader,
nerReader,
snowballStemmer,
nerAnnotator,
nerWriter);
}
示例3: writeModel
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
/**
* @param posTagFile
* @param configFileName
* @param language
* @throws UIMAException
* @throws IOException
*/
private void writeModel(File posTagFile, String language, String configFileName) throws UIMAException, IOException {
new File(getModelDir()).mkdirs();
CollectionReader posTagFileReader = FilesCollectionReader.getCollectionReaderWithSuffixes(
posTagFile.getAbsolutePath(), NERReader.CONLL_VIEW, posTagFile.getName());
AnalysisEngine snowballStemmer = createEngine(SnowballStemmer.class, SnowballStemmer.PARAM_LANGUAGE, language);
AnalysisEngine nerAnnotator = createEngine(NERAnnotator.class,
NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE, FEATURE_EXTRACTOR_CONFIG_DIRECTORY + configFileName,
NERAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, getModelDir(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, CrfSuiteStringOutcomeDataWriter.class);
runPipeline(
posTagFileReader,
createEngine(NERReader.class),
snowballStemmer,
nerAnnotator
);
}
示例4: train
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Override
public void train(CollectionReader collectionReader, File outputDirectory) throws Exception {
AggregateBuilder builder = new AggregateBuilder();
builder.add(UriToDocumentTextAnnotator.getDescription());
builder.add(SentenceAnnotator.getDescription());
builder.add(TokenAnnotator.getDescription());
builder.add(PosTaggerAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));
builder.add(AnalysisEngineFactory.createEngineDescription(GoldQuestionCategoryAnnotator.class));
AnalysisEngineDescription documentClassificationAnnotator = AnalysisEngineFactory.createEngineDescription(
QuestionCategoryAnnotator.class, CleartkAnnotator.PARAM_IS_TRAINING, true,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LibSvmStringOutcomeDataWriter.class.getName());
builder.add(documentClassificationAnnotator);
SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
System.err.println("Train model and write model.jar file.");
HideOutput hider = new HideOutput();
Train.main(outputDirectory, this.trainingArguments.toArray(new String[this.trainingArguments.size()]));
hider.restoreOutput();
}
示例5: main
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getTestDirectory(),
UriCollectionReader.RejectSystemFiles.class, UriCollectionReader.RejectSystemDirectories.class);
AggregateBuilder builder = new AggregateBuilder();
builder.add(UriToDocumentTextAnnotator.getDescription());
builder.add(SentenceAnnotator.getDescription());
builder.add(TokenAnnotator.getDescription());
builder.add(PosTaggerAnnotator.getDescription());
builder.add(DefaultSnowballStemmer.getDescription("English"));
builder.add(AnalysisEngineFactory.createEngineDescription(QuestionCategoryAnnotator.class,
CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
JarClassifierBuilder.getModelJarFile(options.getModelsDirectory())));
SimplePipeline.runPipeline(reader, builder.createAggregateDescription(),
AnalysisEngineFactory.createEngineDescription(PrintClassificationsAnnotator.class));
}
示例6: main
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 3) {
System.out.println("arguments: " //
+ "path to XML descriptor for pipeline, " //
+ "folder with text files, " //
+ "folder to output xmi files");
System.exit(-1);
}
// A collection reader that reads text files
CollectionReader reader = CollectionReaderFactory.createReader(FilesCollectionReader.class,
null, FilesCollectionReader.PARAM_ROOT_FILE, args[1]);
AggregateBuilder builder = new AggregateBuilder();
AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
new XMLInputSource(RunPipelineXmi.class.getClassLoader().getResourceAsStream(args[0]),
new File(".")), new Object[0]);
builder.add(descriptor);
builder.add(XmiWriter.getDescription(new File(args[2])));
SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
}
示例7: main
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 3) {
System.out.println("arguments: " //
+ "path to XML descriptor for pipeline, " //
+ "folder with text files, " //
+ "path to output tsv file");
System.exit(-1);
}
// A collection reader that reads text files
CollectionReader reader = CollectionReaderFactory.createReader(FilesCollectionReader.class,
null, FilesCollectionReader.PARAM_ROOT_FILE, args[1]);
AggregateBuilder builder = new AggregateBuilder();
AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
new XMLInputSource(RunPipelineTsv.class.getClassLoader().getResourceAsStream(args[0]),
new File(".")), new Object[0]);
builder.add(descriptor);
builder.add(TsvWriter.getDescription(new File(args[2])));
SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
}
示例8: main
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// A collection reader that reads XMIs
CollectionReader reader = CollectionReaderFactory.createReader(XReader.class, null, XReader.PARAM_ROOT_FILE,
args[0]);
// The pipeline of annotators
AggregateBuilder builder = new AggregateBuilder();
// other annotators, if needed
builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource("src/main/resources/org/ie4opendata/octroy/SimpleFrenchTokenAndSentenceAnnotator.xml")));
// Use this to get the parameters for the descriptor
//System.out.println(ReasonAnnotator.getClassifierDescription("org/ie4opendata/octroy/reason/model.jar"));
//System.exit(0);
// The reason classifier annotator, configured to write training data
builder.add(ReasonAnnotator.getWriterDescription("src/main/resources/org/ie4opendata/octroy/reason"));
// Run the pipeline of annotators on each of the CASes produced by the reader
SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
// Train a classifier on the training data, and package it into a .jar file
Train.main("src/main/resources/org/ie4opendata/octroy/reason");
}
示例9: testCollectionReader
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader() throws IOException, UIMAException {
File collectionFile = createSampleCollectionFile();
int numToSkip = 0;
int numToProcess = -1; // process all
CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC1_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC2_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC3_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例10: testCollectionReader_LimitingNumberProcessed
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader_LimitingNumberProcessed() throws IOException, UIMAException {
File collectionFile = createSampleCollectionFile();
int numToSkip = 0;
int numToProcess = 1; // process one
CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC1_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例11: testCollectionReader_SkippingOneAndLimitingNumberProcessed
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testCollectionReader_SkippingOneAndLimitingNumberProcessed() throws IOException, UIMAException {
File collectionFile = createSampleCollectionFile();
int numToSkip = 1;
int numToProcess = 1; // process one
CollectionReader cr = DocumentPerLineCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
collectionFile, numToSkip, numToProcess, TabDocumentExtractor.class, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC2_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例12: testClasspathCollectionReader
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader() throws UIMAException, IOException {
int numToSkip = 0;
int numToProcess = -1;
CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC1_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC2_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC3_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例13: testClasspathCollectionReader_SkippingOne
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader_SkippingOne() throws UIMAException, IOException {
int numToSkip = 1;
int numToProcess = -1;
CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC2_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC3_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例14: testClasspathCollectionReader_ProcessTwo
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testClasspathCollectionReader_ProcessTwo() throws UIMAException, IOException {
int numToSkip = 0;
int numToProcess = 2;
CollectionReader cr = ClasspathCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
SAMPLE_CLASSPATH_COLLECTION_PATH, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
assertTrue(jCasIterable.hasNext());
JCas jCas = jCasIterable.next();
assertEquals(DOC1_TEXT, jCas.getDocumentText());
assertTrue(jCasIterable.hasNext());
jCas = jCasIterable.next();
assertEquals(DOC2_TEXT, jCas.getDocumentText());
assertFalse(jCasIterable.hasNext());
}
示例15: testPubmedXmlCollectionReader
import org.apache.uima.collection.CollectionReader; //导入依赖的package包/类
@Test
public void testPubmedXmlCollectionReader() throws UIMAException, IOException {
int numToSkip = 0;
int numToProcess = -1; // process all
CollectionReader cr = PubmedXmlFileCollectionReader.createCollectionReader(TypeSystemUtil.getCcpTypeSystem(),
samplePubmedXmlFile, numToSkip, numToProcess, CcpDocumentMetadataHandler.class);
JCasIterable jCasIterable = new JCasIterable(cr);
int count = 1;
while (jCasIterable.hasNext()) {
String documentText = String.format("Title %d\n\nAbstract %d", count, count);
if (count == 8)
documentText = "Title 8\n\nBACKGROUND: Abstract 8a\nOBJECTIVE: Abstract 8b\nMETHODS: Abstract 8c\nMETHODS: Abstract 8d\nMETHODS: Abstract 8e\nRESULTS: Abstract 8f\nCONCLUSIONS: Abstract 8";
if (count == 11)
documentText = "Book Title 1\n\nBook Abstract 1a\n\nBook Abstract 1b\n\nBook Abstract 1c\n\nBook Abstract 1d";
JCas jCas = jCasIterable.next();
assertEquals(documentText, jCas.getDocumentText());
count++;
}
assertFalse(jCasIterable.hasNext());
}