当前位置: 首页>>代码示例>>Java>>正文


Java TextCorpusStored类代码示例

本文整理汇总了Java中eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored的典型用法代码示例。如果您正苦于以下问题:Java TextCorpusStored类的具体用法?Java TextCorpusStored怎么用?Java TextCorpusStored使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TextCorpusStored类属于eu.clarin.weblicht.wlfxb.tc.xb包,在下文中一共展示了TextCorpusStored类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: casToTcfWriter

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
 * Create TCF File from scratch
 * 
 * @param aJCas
 *            the JCas.
 * @param aOs
 *            the output stream.
 * @throws WLFormatException
 *             if a TCF problem occurs.
 */
public void casToTcfWriter(JCas aJCas, OutputStream aOs)
    throws WLFormatException
{
    // create TextCorpus object, specifying its language from the aJcas Object
    TextCorpusStored textCorpus = new TextCorpusStored(aJCas.getDocumentLanguage());

    // create text annotation layer and add the string of the text into the layer
    textCorpus.createTextLayer().addText(aJCas.getDocumentText());

    write(aJCas, textCorpus);

    // write the annotated data object into the output stream
    WLData wldata = new WLData(textCorpus);
    WLDObjector.write(wldata, aOs);
}
 
开发者ID:webanno,项目名称:webanno,代码行数:26,代码来源:TcfWriter.java

示例2: processLayers

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void processLayers() throws WLFormatException {
    boolean textCorpusEnd = false;
    XMLEvent peekedEvent;
    try {
        peekedEvent = xmlEventReader.peek();
        while (!textCorpusEnd && peekedEvent != null) {
            if (peekedEvent.getEventType() == XMLStreamConstants.END_ELEMENT
                    && peekedEvent.asEndElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)) {
                textCorpusEnd = true;
            } else if (peekedEvent.getEventType() == XMLStreamConstants.START_ELEMENT) {
                processLayer();
                peekedEvent = xmlEventReader.peek();
            } else {
                XMLEvent readEvent = xmlReaderWriter.readEvent();
                xmlReaderWriter.add(readEvent);
                peekedEvent = xmlEventReader.peek();
            }
        }
    } catch (XMLStreamException e) {
        throw new WLFormatException(e.getMessage(), e);
    }

    if (!textCorpusEnd) {
        throw new WLFormatException(TextCorpusStored.XML_NAME + " end tag not found");
    }
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:27,代码来源:TextCorpusStreamedWithReplaceableLayers.java

示例3: ExternalDataWithTextCorpusStreamed

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
 * Creates a <tt>ExternalDataWithTextCorpusStreamed</tt> from the given TCF
 * input stream and specified annotation layers.
 * 
 * @param inputStream the underlying input stream with linguistic annotations 
 * in TCF format.
 * @param edLayersToRead the annotation layers of <tt>ExternalData</tt> that
 * should be read into this <tt>ExternalDataWithTextCorpusStreamed</tt>.
 * @param tcLayersToRead the annotation layers of <tt>TextCorpus</tt> that
 * should be read into this <tt>ExternalDataWithTextCorpusStreamed</tt>.
 * 
 * @throws WLFormatException if an error in input format or an I/O error occurs.
 */
public ExternalDataWithTextCorpusStreamed(InputStream inputStream,
        EnumSet<ExternalDataLayerTag> edLayersToRead,
        EnumSet<TextCorpusLayerTag> tcLayersToRead)
        throws WLFormatException {
    getLayersToReadWithDependencies(edLayersToRead, tcLayersToRead);
    extData = new ExternalDataStored();
    textCorpus = new TextCorpusStored("unknown");
    try {
        initializeReaderAndWriter(inputStream, null, false);
        readLayers();
    } catch (WLFormatException e) {
        Logger.getLogger(ExternalDataWithTextCorpusStreamed.class.getName()).log(Level.SEVERE, null, e);
        cleanup();
        throw e;
    }
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:30,代码来源:ExternalDataWithTextCorpusStreamed.java

示例4: readUpToExtDataOrTextCorpus

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void readUpToExtDataOrTextCorpus() throws WLFormatException {
    XMLEvent peekedEvent;
    try {
        peekedEvent = xmlEventReader.peek();
        while (!hasExtData || !hasTextCorpus) {
            if (peekedEvent.getEventType() == XMLStreamConstants.START_ELEMENT
                    && ((peekedEvent.asStartElement().getName().getLocalPart() == null ? ExternalDataStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(ExternalDataStored.XML_NAME))
                    || (peekedEvent.asStartElement().getName().getLocalPart() == null ? TextCorpusStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)))) {
                if (peekedEvent.asStartElement().getName().getLocalPart() == null ? ExternalDataStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(ExternalDataStored.XML_NAME)) {
                    this.hasExtData = true;
                } else if (peekedEvent.asStartElement().getName().getLocalPart() == null ? TextCorpusStored.XML_NAME == null : peekedEvent.asStartElement().getName().getLocalPart().equals(TextCorpusStored.XML_NAME)) {
                    this.hasTextCorpus = true;
                }
                return;
            } else {
                XMLEvent readEvent = xmlReaderWriter.readEvent();
                xmlReaderWriter.add(readEvent);
                peekedEvent = xmlEventReader.peek();
            }
        }

    } catch (XMLStreamException e) {
        throw new WLFormatException(e.getMessage(), e);
    }
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:26,代码来源:ExternalDataWithTextCorpusStreamed.java

示例5: createTestTextCorpus

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
    TextCorpusStored textCorpus = new TextCorpusStored("de");
    String text = "<ähm> ich würde diesmal sagen Theater das ist dann immer so aufge in letzter "
            + "Minute so was spielt man heute abend ich wäre eher dafür daß wir vielleicht ins Kino "
            + "gehen und nachher irgendwo in eine nette Kneipe";
    textCorpus.createTextLayer().addText(text);
    TokensLayer tokensLayer = textCorpus.createTokensLayer();
    String[] tokenStrings = ("<ähm> ich würde diesmal sagen Theater das ist dann immer so aufge in letzter "
            + "Minute so was spielt man heute abend ich wäre eher dafür daß wir vielleicht ins Kino "
            + "gehen und nachher irgendwo in eine nette Kneipe").split(" ");
    List<Token> tokens = new ArrayList<Token>();
    for (String tokenString : tokenStrings) {
        Token token = tokensLayer.addToken(tokenString);
        tokens.add(token);
    }
    return textCorpus;
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:18,代码来源:WLDObjector2Test.java

示例6: createTestTextCorpus

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
    TextCorpusStored textCorpus = new TextCorpusStored("en");
    String text = "This is a test. This is the second sentence."; // 0-15, 16-44
    textCorpus.createTextLayer().addText(text);
    TokensLayer tokensLayer = textCorpus.createTokensLayer();
    String[] tokenStrings = "This is a test . This is the second sentence .".split(" ");
    List<Token> tokens = new ArrayList<Token>();
    for (String tokenString : tokenStrings) {
        Token token = tokensLayer.addToken(tokenString);
        tokens.add(token);
    }
    SentencesLayer sentencesLayer = textCorpus.createSentencesLayer();
    sentencesLayer.addSentence(tokens.subList(0, 5), 0, 15);
    sentencesLayer.addSentence(tokens.subList(5, 11), 16, 44);
    return textCorpus;
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:17,代码来源:WLDObjectorTest.java

示例7: TCFWriter

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public TCFWriter(Writer writer, String language, boolean lemmas, Optional<String> posTagset,
                 Optional<String> dependencyTagset) {
    corpusWriter = writer;
    corpus = new TextCorpusStored(language);
    sentencesLayer = corpus.createSentencesLayer();
    tokensLayer = corpus.createTokensLayer();
    lemmasLayer = lemmas ? corpus.createLemmasLayer() : null;
    posTagsLayer = posTagset.isPresent() ? corpus.createPosTagsLayer(posTagset.get()) : null;
    dependencyLayer = dependencyTagset.isPresent() ?
            corpus.createDependencyParsingLayer(dependencyTagset.get(), true, false) : null;
}
 
开发者ID:weblicht,项目名称:conll-utils,代码行数:12,代码来源:TCFWriter.java

示例8: testReadWrite

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
@Test
public void testReadWrite() throws Exception {

    TextCorpusStored tc = new TextCorpusStored("de");
    MetaData md = new MetaData();
    //WLData data = new WLData(tc);

    String outfile = testFolder.getRoot() + File.separator + OUTPUT_FILE;
    File ofile = new File(outfile);

    TextLayer text = tc.createTextLayer();
    text.addText("Peter aß eine Käsepizza. Sie schmeckte ihm.");
    TokensLayer tokens = tc.createTokensLayer();
    for (String tokenString : tokenstrings) {
        tokens.addToken(tokenString);
    }
    TextStructureLayer textstructure = tc.createTextStructureLayer();
    TextSpan parentSpan = textstructure.addSpan(tokens.getToken(0), tokens.getToken(4), "page");
    
    textstructure.addSpan(parentSpan, null, null, "number", "1");
    
    textstructure.addSpan(null, null, "line");
    textstructure.addSpan(tokens.getToken(0), tokens.getToken(8), "paragraph", 0, 45);
    textstructure.addSpan(tokens.getToken(0), tokens.getToken(2), "line", 0, 13);
    textstructure.addSpan(tokens.getToken(3), tokens.getToken(4), "line", 14, 25);
    textstructure.addSpan(tokens.getToken(5), tokens.getToken(8), "page", 26, 45);
    textstructure.addSpan(tokens.getToken(5), tokens.getToken(6), "line", 26, 39);
    textstructure.addSpan(tokens.getToken(7), tokens.getToken(8), "line", 40, 45);
    textstructure.addSpan(null, null, "line");

    WLDObjector.write(md, tc, ofile, false);

    System.out.println(tc);
    // compare output xml with expected xml
    assertEqualXml(EXPECTED_OUTPUT_FILE, outfile);
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:37,代码来源:TextCorpusTextStructureTest.java

示例9: testWrite

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
@Test
public void testWrite() throws Exception {
    String outfile = testFolder.getRoot() + File.separator + OUTPUT_FILE;
    OutputStream os = new FileOutputStream(outfile);
    TextCorpusStored tc = new TextCorpusStored("de");

    queryCorporaAndAddMatchesToTextCorpus(tc);

    WLDObjector.write(new MetaData(), tc, os, false);
    os.close();

    System.out.println(tc);
    // compare output xml with expected xml
    assertEqualXml(EXPECTED_OUTPUT_FILE, outfile);
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:16,代码来源:TextCorpusMatchesTest.java

示例10: queryCorporaAndAddMatchesToTextCorpus

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private void queryCorporaAndAddMatchesToTextCorpus(TextCorpusStored tc) {

        Map<String, String> token2StrId = new LinkedHashMap<String, String>();


        token2StrId.put("Peter", "5-1023");
        token2StrId.put("aß", "5-1024");
        token2StrId.put("eine", "5-1025");
        token2StrId.put("Käsepizza", "5-1026");
        token2StrId.put(".", "5-1027");
        token2StrId.put("Sie", "16-116");
        token2StrId.put("schmeckte", "16-117");
        token2StrId.put("ihm", "16-118");


        TokensLayer tokensLayer = tc.createTokensLayer();
        MatchesLayer matchesLayer = tc.createMatchesLayer("sqp", "tb_lemma=\"Peter\" | tb_lemma=\"schmecken\"");
        MatchedCorpus corpus = matchesLayer.addCorpus("wcorp-1", "some-pid");
        for (String t : token2StrId.keySet()) {
            Token token = tokensLayer.addToken(t);
            if (t.equals("Peter") || t.equals("schmeckte")) {
                Map<String, String> targets = new HashMap<String, String>();
                targets.put("tname", token.getID());
                Map<String, String> cats = new HashMap<String, String>();
                cats.put("cname", "cval");
                List<Token> refToks = Arrays.asList(new Token[]{token});
                List<String> refOrigToks = Arrays.asList(new String[]{token2StrId.get(t)});
                matchesLayer.addItem(corpus,
                        refToks,
                        refOrigToks,
                        targets, cats);
            }
        }
    }
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:35,代码来源:TextCorpusMatchesTest.java

示例11: createWLTestData

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private WLData createWLTestData() {
    MetaData md = createTestMetadata();
    TextCorpusStored tc = createTestTextCorpus();
    ExternalDataStored ed = createTestExternalData();
    WLData data = new WLData(md, ed, tc);
    return data;
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:8,代码来源:WLDObjector2Test.java

示例12: createTestTextCorpus

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
private TextCorpusStored createTestTextCorpus() {
    TextCorpusStored textCorpus = new TextCorpusStored("en");
    textCorpus.createTextLayer();
    textCorpus.createTokensLayer();
    textCorpus.createSentencesLayer();
    textCorpus.createPosTagsLayer("Tiger");
    return textCorpus;
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:9,代码来源:WLDObjectorEmptyLayersTest.java

示例13: testOneWay

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public void testOneWay(String aInputFile, String aExpectedFile)
    throws Exception
{
    CollectionReaderDescription reader = createReaderDescription(TcfReader.class, 
            TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/",
            TcfReader.PARAM_PATTERNS, aInputFile);

    AnalysisEngineDescription writer = createEngineDescription(
            TcfWriter.class,
            TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway",
            TcfWriter.PARAM_FILENAME_SUFFIX, ".xml",
            TcfWriter.PARAM_STRIP_EXTENSION, true);

    AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class,
            CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt");

    runPipeline(reader, writer, dumper);

    InputStream isReference = new FileInputStream(new File("src/test/resources/"
            + aExpectedFile));

    InputStream isActual = new FileInputStream(new File("target/test-output/oneway/"
            + aInputFile));

    WLData wLDataReference = WLDObjector.read(isReference);
    TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus();

    WLData wLDataActual = WLDObjector.read(isActual);
    TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus();

    // check if layers maintained
    assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size());

    // Check if every layers have the same number of annotations
    for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) {
        assertEquals("Layer size mismatch in [" + layer.getClass().getName() + "]",
                layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size());
    }

    XMLAssert.assertXMLEqual(
            new InputSource("src/test/resources/" + aExpectedFile),
            new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath()));
}
 
开发者ID:webanno,项目名称:webanno,代码行数:44,代码来源:TcfReaderWriterTest.java

示例14: WLData

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
/**
 * Creates <tt>WLData</tt> with the given text corpus annotations.
 *
 * @param textCorpus text corpus
 */
public WLData(TextCorpusStored textCorpus) {
    this.version = XML_VERSION;
    this.metaData = new MetaData();
    this.textCorpus = textCorpus;
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:11,代码来源:WLData.java

示例15: write

import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; //导入依赖的package包/类
public static void write(MetaData md, TextCorpus tc,
        OutputStream outputStream, boolean outputAsXmlFragment)
        throws WLFormatException {

    XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLOutputFactory xmlOututFactory = XMLOutputFactory.newInstance();
    XMLEvent e;
    XMLEventWriter xmlEventWriter = null;
    
    try {
        xmlEventWriter = new IndentingXMLEventWriter(xmlOututFactory.createXMLEventWriter(outputStream, "UTF-8"));

        if (!outputAsXmlFragment) {
            e = eventFactory.createStartDocument("UTF-8");
            xmlEventWriter.add(e);
            e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
            xmlEventWriter.add(e);
            e = eventFactory.createProcessingInstruction(
                    XmlReaderWriter.XML_WL1_MODEL_PI_NAME,
                    CommonConstants.XML_WL1_MODEL_PI_CONTENT);
            xmlEventWriter.add(e);
            e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
            xmlEventWriter.add(e);
        }

        Attribute versionAttr = eventFactory.createAttribute("version", WLData.XML_VERSION);
        List<Attribute> attrs = new ArrayList<Attribute>();
        attrs.add(versionAttr);
        Namespace ns = eventFactory.createNamespace(WLData.XML_NAMESPACE);
        List<Namespace> nss = new ArrayList<Namespace>();
        nss.add(ns);
        e = eventFactory.createStartElement("", WLData.XML_NAMESPACE, WLData.XML_NAME, attrs.iterator(), nss.iterator());
        xmlEventWriter.add(e);
        e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
        xmlEventWriter.add(e);

        JAXBContext mdContext = JAXBContext.newInstance(MetaData.class);
        Marshaller mdMarshaller = mdContext.createMarshaller();
        //does not work with XMLEventWriter:
        //mdMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        mdMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
        mdMarshaller.setProperty(Marshaller.JAXB_SCHEMA_LOCATION, CommonConstants.CMD_SCHEMA_LOCATION);
        mdMarshaller.marshal(md, xmlEventWriter);
        
        e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
        xmlEventWriter.add(e);

        JAXBContext tcContext = JAXBContext.newInstance(TextCorpusStored.class);
        Marshaller tcMarshaller = tcContext.createMarshaller();
        //does not work with XMLEventWriter:
        //tcMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        tcMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
        tcMarshaller.marshal(tc, xmlEventWriter);
        
        e = eventFactory.createIgnorableSpace(XmlReaderWriter.NEW_LINE);
        xmlEventWriter.add(e);

        e = eventFactory.createEndElement("", WLData.XML_NAMESPACE, WLData.XML_NAME);
        xmlEventWriter.add(e);

        if (!outputAsXmlFragment) {
            e = eventFactory.createEndDocument();
            xmlEventWriter.add(e);
        }
    } catch (Exception ex) {
        throw new WLFormatException(ex.getMessage(), ex);
    } finally {
        if (xmlEventWriter != null) {
            try {
                xmlEventWriter.flush();
                xmlEventWriter.close();
            } catch (XMLStreamException ex2) {
                throw new WLFormatException(ex2.getMessage(), ex2);
            }
        }
    }
}
 
开发者ID:weblicht,项目名称:wlfxb,代码行数:78,代码来源:WLDObjector.java


注:本文中的eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。