本文整理汇总了Java中eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored类的典型用法代码示例。如果您正苦于以下问题:Java TextCorpusStored类的具体用法?Java TextCorpusStored怎么用?Java TextCorpusStored使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TextCorpusStored类属于eu.clarin.weblicht.wlfxb.tc.xb包,在下文中一共展示了TextCorpusStored类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: casToTcfWriter
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
* Create TCF File from scratch
*
* @param aJCas
* the JCas.
* @param aOs
* the output stream.
* @throws WLFormatException
* if a TCF problem occurs.
*/
public void casToTcfWriter(JCas aJCas, OutputStream aOs)
throws WLFormatException
{
// create TextCorpus object, specifying its language from the aJcas Object
TextCorpusStored textCorpus = new TextCorpusStored(aJCas.getDocumentLanguage());
// create text annotation layer and add the string of the text into the layer
textCorpus.createTextLayer().addText(aJCas.getDocumentText());
write(aJCas, textCorpus);
// write the annotated data object into the output stream
WLData wldata = new WLData(textCorpus);
WLDObjector.write(wldata, aOs);
}
示例2: processLayers
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void processLayers() throws WLFormatException {
boolean textCorpusEnd = false;
XMLEvent peekedEvent;
try {
peekedEvent = xmlEventReader.peek();
while (!textCorpusEnd && peekedEvent != null) {
if (peekedEvent.getEventType() == XMLStreamConstants.END_ELEMENT
&& peekedEvent.asEndElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)) {
textCorpusEnd = true;
} else if (peekedEvent.getEventType() == XMLStreamConstants.START_ELEMENT) {
processLayer();
peekedEvent = xmlEventReader.peek();
} else {
XMLEvent readEvent = xmlReaderWriter.readEvent();
xmlReaderWriter.add(readEvent);
peekedEvent = xmlEventReader.peek();
}
}
} catch (XMLStreamException e) {
throw new WLFormatException(e.getMessage(), e);
}
if (!textCorpusEnd) {
throw new WLFormatException(TextCorpusStored.XML_NAME + " end tag not found");
}
}
示例3: ExternalDataWithTextCorpusStreamed
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
* Creates a <tt>ExternalDataWithTextCorpusStreamed</tt> from the given TCF
* input stream and specified annotation layers.
*
* @param inputStream the underlying input stream with linguistic annotations
* in TCF format.
* @param edLayersToRead the annotation layers of <tt>ExternalData</tt> that
* should be read into this <tt>ExternalDataWithTextCorpusStreamed</tt>.
* @param tcLayersToRead the annotation layers of <tt>TextCorpus</tt> that
* should be read into this <tt>ExternalDataWithTextCorpusStreamed</tt>.
*
* @throws WLFormatException if an error in input format or an I/O error occurs.
*/
public ExternalDataWithTextCorpusStreamed(InputStream inputStream,
EnumSet<ExternalDataLayerTag> edLayersToRead,
EnumSet<TextCorpusLayerTag> tcLayersToRead)
throws WLFormatException {
getLayersToReadWithDependencies(edLayersToRead, tcLayersToRead);
extData = new ExternalDataStored();
textCorpus = new TextCorpusStored("unknown");
try {
initializeReaderAndWriter(inputStream, null, false);
readLayers();
} catch (WLFormatException e) {
Logger.getLogger(ExternalDataWithTextCorpusStreamed.class.getName()).log(Level.SEVERE, null, e);
cleanup();
throw e;
}
}
示例4: readUpToExtDataOrTextCorpus
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void readUpToExtDataOrTextCorpus() throws WLFormatException {
XMLEvent peekedEvent;
try {
peekedEvent = xmlEventReader.peek();
while (!hasExtData || !hasTextCorpus) {
if (peekedEvent.getEventType() == XMLStreamConstants.START_ELEMENT
&& ((peekedEvent.asStartElement().getName().getLocalPart() == null ? ExternalDataStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(ExternalDataStored.XML_NAME))
|| (peekedEvent.asStartElement().getName().getLocalPart() == null ? TextCorpusStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)))) {
if (peekedEvent.asStartElement().getName().getLocalPart() == null ? ExternalDataStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(ExternalDataStored.XML_NAME)) {
this.hasExtData = true;
} else if (peekedEvent.asStartElement().getName().getLocalPart() == null ? TextCorpusStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)) {
this.hasTextCorpus = true;
}
return;
} else {
XMLEvent readEvent = xmlReaderWriter.readEvent();
xmlReaderWriter.add(readEvent);
peekedEvent = xmlEventReader.peek();
}
}
} catch (XMLStreamException e) {
throw new WLFormatException(e.getMessage(), e);
}
}
示例5: createTestTextCorpus
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
TextCorpusStored textCorpus = new TextCorpusStored("de");
String text = "<ähm> ich würde diesmal sagen Theater das ist dann immer so aufge in letzter "
+ "Minute so was spielt man heute abend ich wäre eher dafür daß wir vielleicht ins Kino "
+ "gehen und nachher irgendwo in eine nette Kneipe";
textCorpus.createTextLayer().addText(text);
TokensLayer tokensLayer = textCorpus.createTokensLayer();
String[] tokenStrings = ("<ähm> ich würde diesmal sagen Theater das ist dann immer so aufge in letzter "
+ "Minute so was spielt man heute abend ich wäre eher dafür daß wir vielleicht ins Kino "
+ "gehen und nachher irgendwo in eine nette Kneipe").split(" ");
List<Token> tokens = new ArrayList<Token>();
for (String tokenString : tokenStrings) {
Token token = tokensLayer.addToken(tokenString);
tokens.add(token);
}
return textCorpus;
}
示例6: createTestTextCorpus
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
TextCorpusStored textCorpus = new TextCorpusStored("en");
String text = "This is a test. This is the second sentence."; // 0-15, 16-44
textCorpus.createTextLayer().addText(text);
TokensLayer tokensLayer = textCorpus.createTokensLayer();
String[] tokenStrings = "This is a test . This is the second sentence .".split(" ");
List<Token> tokens = new ArrayList<Token>();
for (String tokenString : tokenStrings) {
Token token = tokensLayer.addToken(tokenString);
tokens.add(token);
}
SentencesLayer sentencesLayer = textCorpus.createSentencesLayer();
sentencesLayer.addSentence(tokens.subList(0, 5), 0, 15);
sentencesLayer.addSentence(tokens.subList(5, 11), 16, 44);
return textCorpus;
}
示例7: TCFWriter
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public TCFWriter(Writer writer, String language, boolean lemmas, Optional<String> posTagset,
Optional<String> dependencyTagset) {
corpusWriter = writer;
corpus = new TextCorpusStored(language);
sentencesLayer = corpus.createSentencesLayer();
tokensLayer = corpus.createTokensLayer();
lemmasLayer = lemmas ? corpus.createLemmasLayer() : null;
posTagsLayer = posTagset.isPresent() ? corpus.createPosTagsLayer(posTagset.get()) : null;
dependencyLayer = dependencyTagset.isPresent() ?
corpus.createDependencyParsingLayer(dependencyTagset.get(), true, false) : null;
}
示例8: testReadWrite
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
@Test
public void testReadWrite() throws Exception {
TextCorpusStored tc = new TextCorpusStored("de");
MetaData md = new MetaData();
//WLData data = new WLData(tc);
String outfile = testFolder.getRoot() + File.separator + OUTPUT_FILE;
File ofile = new File(outfile);
TextLayer text = tc.createTextLayer();
text.addText("Peter aß eine Käsepizza. Sie schmeckte ihm.");
TokensLayer tokens = tc.createTokensLayer();
for (String tokenString : tokenstrings) {
tokens.addToken(tokenString);
}
TextStructureLayer textstructure = tc.createTextStructureLayer();
TextSpan parentSpan = textstructure.addSpan(tokens.getToken(0), tokens.getToken(4), "page");
textstructure.addSpan(parentSpan, null, null, "number", "1");
textstructure.addSpan(null, null, "line");
textstructure.addSpan(tokens.getToken(0), tokens.getToken(8), "paragraph", 0, 45);
textstructure.addSpan(tokens.getToken(0), tokens.getToken(2), "line", 0, 13);
textstructure.addSpan(tokens.getToken(3), tokens.getToken(4), "line", 14, 25);
textstructure.addSpan(tokens.getToken(5), tokens.getToken(8), "page", 26, 45);
textstructure.addSpan(tokens.getToken(5), tokens.getToken(6), "line", 26, 39);
textstructure.addSpan(tokens.getToken(7), tokens.getToken(8), "line", 40, 45);
textstructure.addSpan(null, null, "line");
WLDObjector.write(md, tc, ofile, false);
System.out.println(tc);
// compare output xml with expected xml
assertEqualXml(EXPECTED_OUTPUT_FILE, outfile);
}
示例9: testWrite
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
@Test
public void testWrite() throws Exception {
String outfile = testFolder.getRoot() + File.separator + OUTPUT_FILE;
OutputStream os = new FileOutputStream(outfile);
TextCorpusStored tc = new TextCorpusStored("de");
queryCorporaAndAddMatchesToTextCorpus(tc);
WLDObjector.write(new MetaData(), tc, os, false);
os.close();
System.out.println(tc);
// compare output xml with expected xml
assertEqualXml(EXPECTED_OUTPUT_FILE, outfile);
}
示例10: queryCorporaAndAddMatchesToTextCorpus
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void queryCorporaAndAddMatchesToTextCorpus(TextCorpusStored tc) {
Map<String, String> token2StrId = new LinkedHashMap<String, String>();
token2StrId.put("Peter", "5-1023");
token2StrId.put("aß", "5-1024");
token2StrId.put("eine", "5-1025");
token2StrId.put("Käsepizza", "5-1026");
token2StrId.put(".", "5-1027");
token2StrId.put("Sie", "16-116");
token2StrId.put("schmeckte", "16-117");
token2StrId.put("ihm", "16-118");
TokensLayer tokensLayer = tc.createTokensLayer();
MatchesLayer matchesLayer = tc.createMatchesLayer("sqp", "tb_lemma=\"Peter\" | tb_lemma=\"schmecken\"");
MatchedCorpus corpus = matchesLayer.addCorpus("wcorp-1", "some-pid");
for (String t : token2StrId.keySet()) {
Token token = tokensLayer.addToken(t);
if (t.equals("Peter") || t.equals("schmeckte")) {
Map<String, String> targets = new HashMap<String, String>();
targets.put("tname", token.getID());
Map<String, String> cats = new HashMap<String, String>();
cats.put("cname", "cval");
List<Token> refToks = Arrays.asList(new Token[]{token});
List<String> refOrigToks = Arrays.asList(new String[]{token2StrId.get(t)});
matchesLayer.addItem(corpus,
refToks,
refOrigToks,
targets, cats);
}
}
}
示例11: createWLTestData
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private WLData createWLTestData() {
MetaData md = createTestMetadata();
TextCorpusStored tc = createTestTextCorpus();
ExternalDataStored ed = createTestExternalData();
WLData data = new WLData(md, ed, tc);
return data;
}
示例12: createTestTextCorpus
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
TextCorpusStored textCorpus = new TextCorpusStored("en");
textCorpus.createTextLayer();
textCorpus.createTokensLayer();
textCorpus.createSentencesLayer();
textCorpus.createPosTagsLayer("Tiger");
return textCorpus;
}
示例13: testOneWay
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public void testOneWay(String aInputFile, String aExpectedFile)
throws Exception
{
CollectionReaderDescription reader = createReaderDescription(TcfReader.class,
TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/",
TcfReader.PARAM_PATTERNS, aInputFile);
AnalysisEngineDescription writer = createEngineDescription(
TcfWriter.class,
TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway",
TcfWriter.PARAM_FILENAME_SUFFIX, ".xml",
TcfWriter.PARAM_STRIP_EXTENSION, true);
AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class,
CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");
runPipeline(reader, writer, dumper);
InputStream isReference = new FileInputStream(new File("src/test/resources/"
+ aExpectedFile));
InputStream isActual = new FileInputStream(new File("target/test-output/oneway/"
+ aInputFile));
WLData wLDataReference = WLDObjector.read(isReference);
TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();
WLData wLDataActual = WLDObjector.read(isActual);
TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();
// check if layers maintained
assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());
// Check if every layers have the same number of annotations
for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]",
layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
}
XMLAssert.assertXMLEqual(
new InputSource("src/test/resources/" + aExpectedFile),
new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
示例14: WLData
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
* Creates <tt>WLData</tt> with the given text corpus annotations.
*
* @param textCorpus text corpus
*/
public WLData(TextCorpusStored textCorpus) {
this.version = XML_VERSION;
this.metaData = new MetaData();
this.textCorpus = textCorpus;
}
示例15: write
import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public static void write(MetaData md, TextCorpus tc,
OutputStream outputStream, boolean outputAsXmlFragment)
throws WLFormatException {
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
XMLOutputFactory xmlOututFactory = XMLOutputFactory.newInstance();
XMLEvent e;
XMLEventWriter xmlEventWriter = null;
try {
xmlEventWriter = new IndentingXMLEventWriter(xmlOututFactory.createXMLEventWriter(outputStream, "UTF-8"));
if (!outputAsXmlFragment) {
e = eventFactory.createStartDocument("UTF-8");
xmlEventWriter.add(e);
e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
xmlEventWriter.add(e);
e = eventFactory.createProcessingInstruction(
XmlReaderWriter.XML_WL1_MODEL_PI_NAME,
CommonConstants.XML_WL1_MODEL_PI_CONTENT);
xmlEventWriter.add(e);
e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
xmlEventWriter.add(e);
}
Attribute versionAttr = eventFactory.createAttribute("version", WLData.XML_VERSION);
List<Attribute> attrs = new ArrayList<Attribute>();
attrs.add(versionAttr);
Namespace ns = eventFactory.createNamespace(WLData.XML_NAMESPACE);
List<Namespace> nss = new ArrayList<Namespace>();
nss.add(ns);
e = eventFactory.createStartElement("", WLData.XML_NAMESPACE, WLData.XML_NAME, attrs.iterator(), nss.iterator());
xmlEventWriter.add(e);
e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
xmlEventWriter.add(e);
JAXBContext mdContext = JAXBContext.newInstance(MetaData.class);
Marshaller mdMarshaller = mdContext.createMarshaller();
//does not work with XMLEventWriter:
//mdMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
mdMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
mdMarshaller.setProperty(Marshaller.JAXB_SCHEMA_LOCATION, CommonConstants.CMD_SCHEMA_LOCATION);
mdMarshaller.marshal(md, xmlEventWriter);
e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
xmlEventWriter.add(e);
JAXBContext tcContext = JAXBContext.newInstance(TextCorpusStored.class);
Marshaller tcMarshaller = tcContext.createMarshaller();
//does not work with XMLEventWriter:
//tcMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
tcMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
tcMarshaller.marshal(tc, xmlEventWriter);
e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
xmlEventWriter.add(e);
e = eventFactory.createEndElement("", WLData.XML_NAMESPACE, WLData.XML_NAME);
xmlEventWriter.add(e);
if (!outputAsXmlFragment) {
e = eventFactory.createEndDocument();
xmlEventWriter.add(e);
}
} catch (Exception ex) {
throw new WLFormatException(ex.getMessage(), ex);
} finally {
if (xmlEventWriter != null) {
try {
xmlEventWriter.flush();
xmlEventWriter.close();
} catch (XMLStreamException ex2) {
throw new WLFormatException(ex2.getMessage(), ex2);
}
}
}
}