當前位置: 首頁>>代碼示例>>Java>>正文


Java CAS.getJCas方法代碼示例

本文整理匯總了Java中org.apache.uima.cas.CAS.getJCas方法的典型用法代碼示例。如果您正苦於以下問題:Java CAS.getJCas方法的具體用法?Java CAS.getJCas怎麽用?Java CAS.getJCas使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.uima.cas.CAS的用法示例。


在下文中一共展示了CAS.getJCas方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: getNext

import org.apache.uima.cas.CAS; //導入方法依賴的package包/類
@Override
public void getNext(CAS aCAS)
    throws IOException, CollectionException
{
    super.getNext(aCAS);

    JCas jcas;
    try {
        jcas = aCAS.getJCas();
        JCasId id = new JCasId(jcas);
        id.setId(jcasId++);
        id.addToIndexes();
    }
    catch (CASException e) {
        throw new CollectionException();
    }

    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    outcome.setOutcome(getTextClassificationOutcome(jcas));
    outcome.addToIndexes();

    if (!suppress) {
        new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
    }
}
 
開發者ID:Horsmann,項目名稱:FlexTag,代碼行數:26,代碼來源:TestReaderSingleLabel.java

示例2: getNext

import org.apache.uima.cas.CAS; //導入方法依賴的package包/類
public void getNext(CAS cas) throws IOException, CollectionException {
		currentRecord++;
		JCas jcas;
		try {
			jcas = cas.getJCas();
		} catch (CASException e) {
			throw new CollectionException(e);
		}
		
		// Set document data
		CSVRecord record = recordsIterator.next();
		String docId = record.get(0);
		
		// split into paragraphs
		// add paragraph with annotation
		// LangDect: per paragraph, doc-lang = majority of paragraph languages
		
		jcas.setDocumentText(record.get(1));
		jcas.setDocumentLanguage(record.size() > 3 ? record.get(3) : defaultLanguage);
		
		// Set metadata
		Metadata metaCas = new Metadata(jcas);
		metaCas.setDocId(docId);
		metaCas.setTimestamp(record.get(2));
		metaCas.addToIndexes();
		
//		CSVRecord metaRecord;
//		ArrayList<String> tripletNames = new ArrayList<String>();
//		ArrayList<String> tripletValues = new ArrayList<String>();
//		ArrayList<String> tripletTypes = new ArrayList<String>();
//		while (metadataIterator.hasNext()) {
//			metaRecord = metadataIterator.next();
//			Integer refId = Integer.parseInt(metaRecord.get(2));
//			if (docId != refId) {
//				break;
//			}
//		}
		
		
	}
 
開發者ID:tudarmstadt-lt,項目名稱:newsleak-frontend,代碼行數:41,代碼來源:NewsleakCsvStreamReader.java

示例3: getNext

import org.apache.uima.cas.CAS; //導入方法依賴的package包/類
public void getNext(CAS cas) throws IOException, CollectionException {
	JCas jcas;
	try {
		jcas = cas.getJCas();
	} catch (CASException e) {
		throw new CollectionException(e);
	}

	String docId = totalIdList.get(currentRecord);
	GetResponse response = client
			.prepareGet(esIndex, ElasticsearchDocumentWriter.ES_TYPE_DOCUMENT, docId)
			.setFields("Content", "Created").get();

	jcas.setDocumentText((String) response.getField("Content").getValue());
	jcas.setDocumentLanguage(language);
	
	// Set metadata
	Metadata metaCas = new Metadata(jcas);
	metaCas.setDocId(docId);
	String docDate = (String) response.getField("Created").getValue();
	metaCas.setTimestamp(docDate);
	metaCas.addToIndexes();
	
	// heideltime
	Dct dct = new Dct(jcas);
	dct.setValue(docDate);
	dct.addToIndexes();
	
	currentRecord++;

}
 
開發者ID:tudarmstadt-lt,項目名稱:newsleak-frontend,代碼行數:32,代碼來源:NewsleakElasticsearchReader.java

示例4: main

import org.apache.uima.cas.CAS; //導入方法依賴的package包/類
public static void main(String[] args) throws IOException, InvalidXMLException,
        ResourceInitializationException, AnalysisEngineProcessException, CASException,
        SAXException {
  if (args.length != 2) {
    System.err.println("Usage: XmiToOpenNlpTrainer <input folder with xmis> <output file>");
  }

  AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
          new XMLInputSource(XmiToOpenNlpTrainer.class.getClassLoader().getResourceAsStream(
                  "org/ie4opendata/octroy/SimpleFrenchTokenAndSentenceAnnotator.xml"), new File(
                  ".")), new Object[0]);
  AnalysisEngine engine = AnalysisEngineFactory.createEngine(descriptor);
  CAS cas = engine.newCAS();

  PrintWriter pw = new PrintWriter(new FileWriter(args[1]));

  for (File file : new File(args[0]).listFiles()) {
    cas.reset();

    InputStream is = new FileInputStream(file);
    XmiCasDeserializer.deserialize(is, cas, true);

    JCas jcas = cas.getJCas();

    if (!JCasUtil.exists(jcas, DocumentAnnotation.class)) {
      DocumentAnnotation documentAnnotation = new DocumentAnnotation(cas.getJCas());
      documentAnnotation.setDocumentName(file.getName());
      documentAnnotation.setClassified(false);
      documentAnnotation.addToIndexes();
    }

    if (!JCasUtil.exists(jcas, Sentence.class))
      engine.process(cas);

    List<int[]> companyBoundaries = new ArrayList<int[]>();
    for (Company company : JCasUtil.select(jcas, Company.class))
      companyBoundaries.add(new int[] { company.getBegin(), company.getEnd() });

    int currentCompany = 0;
    boolean inCompany = false;

    // one sentence per line, one token separated by spaces
    for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
      if (inCompany) {
        if (sentence.getBegin() >= companyBoundaries.get(currentCompany)[1]) {
          inCompany = false;
          currentCompany++;
        } else
          pw.print(" <START:company> ");
      }

      for (Token token : JCasUtil.selectCovered(Token.class, sentence)) {
        if (currentCompany < companyBoundaries.size()) {
          if (inCompany) {
            if (token.getBegin() >= companyBoundaries.get(currentCompany)[1]) {
              pw.println(" <END> ");
              inCompany = false;
              currentCompany++;
            }
          } else {
            if (token.getBegin() >= companyBoundaries.get(currentCompany)[0]) {
              pw.print(" <START:company> ");
              inCompany = true;
            }
          }
        }
        pw.print(token.getCoveredText() + " ");
      }
      if (inCompany) {
        pw.println(" <END> ");
      }
      pw.println();
    }
    // each document separated by an empty line
    pw.println();
  }
  pw.close();
}
 
開發者ID:IE4OpenData,項目名稱:Octroy,代碼行數:79,代碼來源:XmiToOpenNlpTrainer.java

示例5: main

import org.apache.uima.cas.CAS; //導入方法依賴的package包/類
public static void main(String[] args) throws IOException, InvalidXMLException, ResourceInitializationException,
		AnalysisEngineProcessException, CASException {
	if (args.length != 2) {
		System.err.println("Usage: OpenNlpTrainerExtractor <input folder> <output file>");
	}

	AnalysisEngineDescription descriptor = (AnalysisEngineDescription) createResourceCreationSpecifier(
			new XMLInputSource(OpenNlpTrainerExtractor.class.getClassLoader().getResourceAsStream(
					"org/ie4opendata/octroy/SimpleFrenchTokenAndSentenceAnnotator.xml"), new File(".")),
			new Object[0]);
	AnalysisEngine engine = AnalysisEngineFactory.createEngine(descriptor);
	CAS cas = engine.newCAS();

	PrintWriter pw = new PrintWriter(new FileWriter(args[1]));

	for (File file : new File(args[0]).listFiles()) {
		BufferedReader br = new BufferedReader(new FileReader(file));

		StringBuilder doc = new StringBuilder();
		String line = br.readLine();
		while (line != null) {
			doc.append(line).append('\n');
			line = br.readLine();
		}
		br.close();

		cas.reset();
		cas.setDocumentText(doc.toString());
		cas.setDocumentLanguage("fr");

		DocumentAnnotation documentAnnotation = new DocumentAnnotation(cas.getJCas());
		documentAnnotation.setDocumentName(file.getName());
		documentAnnotation.setClassified(false);
		documentAnnotation.addToIndexes();

		engine.process(cas);

		// one sentence per line, one token separated by spaces
		JCas jcas = cas.getJCas();
		for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
			for (Token token : JCasUtil.selectCovered(Token.class, sentence)) {
				pw.print(token.getCoveredText() + " ");
			}
			pw.println();
		}
		// each document separated by an empty line
		pw.println();
	}
	pw.close();
}
 
開發者ID:IE4OpenData,項目名稱:Octroy,代碼行數:51,代碼來源:OpenNlpTrainerExtractor.java


注:本文中的org.apache.uima.cas.CAS.getJCas方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。