当前位置: 首页>>代码示例>>Java>>正文


Java Sentence.addToIndexes方法代码示例

本文整理汇总了Java中de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes方法的典型用法代码示例。如果您正苦于以下问题:Java Sentence.addToIndexes方法的具体用法?Java Sentence.addToIndexes怎么用?Java Sentence.addToIndexes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence的用法示例。


在下文中一共展示了Sentence.addToIndexes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getNext

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Override
public void getNext(JCas aJcas) throws IOException, CollectionException {
	File f = documents.get(i);
	LineIterator it = FileUtils.lineIterator(f);
	int start =0;
	int inds=0;
	StringBuffer sb = new StringBuffer();
	while(it.hasNext()){
	    String line = it.nextLine();
	    Sentence sent = new Sentence(aJcas, start, start+line.length());
	    sent.addToIndexes();
	    start = start + line.length() + 1;
	    sb.append(line+"\n");
	    if (inds%10000==0)
	    	System.out.println("R"+inds);
	}
	aJcas.setDocumentText(sb.toString());
	//had to add english as default language, one could also add another configuration parameter
	aJcas.setDocumentLanguage("en");
	i++;
}
 
开发者ID:tudarmstadt-lt,项目名称:sentiment,代码行数:22,代码来源:StanfordReader.java

示例2: testSelectImplicitComponent

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Test
public void testSelectImplicitComponent()
        throws Exception
{
    Sentence s1 = new Sentence(jCas);
    s1.setBegin(this.tokenThis.getBegin());
    s1.setEnd(this.tokenIs.getEnd());
    s1.addToIndexes();

    Sentence s = new ArrayList<Sentence>(JCasUtil.select(jCas, Sentence.class)).get(0);

    // it ignore implicit (zero-length) component -- here at [0, 0], sentence starts at 0
    ArgumentComponent implicitComponent = new ArgumentComponent(jCas, 0, 0);
    implicitComponent.addToIndexes();
    assertEquals(0, JCasUtil2.selectOverlapping(ArgumentComponent.class, s, jCas).size());
}
 
开发者ID:dkpro,项目名称:dkpro-argumentation,代码行数:17,代码来源:JCasUtil2Test.java

示例3: convertSentences

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void convertSentences(JCas aJCas, TextCorpus aCorpusData,
        Map<String, Token> aTokens)
{
    if (aCorpusData.getSentencesLayer() == null) {
        // No layer to read from.
        return;
    }

    for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
                .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));

        Sentence outSentence = new Sentence(aJCas);

        outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
        outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
                .getEnd());
        outSentence.addToIndexes();
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:21,代码来源:TcfReader.java

示例4: createSentence

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd)
{
    // If the next sentence immediately follows the last one without any space or line break
    // in between, then we need to chop off again the linebreak that we added at the end of the
    // last sentence - otherwise offsets will be off on a round-trip.
    if (aPrevEnd == aBegin && coveredText.length() > 0
            && (coveredText.charAt(coveredText.length() - 1) == '\n')) {
        coveredText.deleteCharAt(coveredText.length() - 1);
    }

    if (aPrevEnd + 1 < aBegin) {
        // FIXME This is very slow. Better use StringUtils.repeat()
        StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between
                                                 // sentences
        for (int i = aPrevEnd + 1; i < aBegin; i++) {
            pad.append(" ");
        }
        coveredText.append(pad).append(aLine).append(LF);
    }
    else {
        coveredText.append(aLine).append(LF);
    }
    Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
    sentence.addToIndexes();
}
 
开发者ID:webanno,项目名称:webanno,代码行数:26,代码来源:WebannoTsv3Reader.java

示例5: splitSentences

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public static void splitSentences(JCas aJCas)
{
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
    bi.setText(aJCas.getDocumentText());
    int last = bi.first();
    int cur = bi.next();
    while (cur != BreakIterator.DONE) {
        int[] span = new int[] { last, cur };
        trim(aJCas.getDocumentText(), span);
        if (!isEmpty(span[0], span[1])) {
            Sentence seg = new Sentence(aJCas, span[0], span[1]);
            seg.addToIndexes(aJCas);
        }
        last = cur;
        cur = bi.next();
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:18,代码来源:ImportExportServiceImpl.java

示例6: terminateSentence

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void terminateSentence(Sentence sentence, Token token, StringBuffer docText)
{
    sentence.setEnd(token.getEnd());
    sentence.addToIndexes();
    logger.log(Level.FINE,
            "Sentence:[" + docText.substring(sentence.getBegin(), sentence.getEnd()) + "]\t"
                    + sentence.getBegin() + "\t" + sentence.getEnd());
}
 
开发者ID:floschne,项目名称:NLP_ProjectNER,代码行数:9,代码来源:NERReader.java

示例7: setUp

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Before
public void setUp()
        throws Exception
{
    jCas = JCasFactory.createJCas();
    jCas.setDocumentText("s0t0 s0t2 s1t0 s2t0 s2t0 s3t0");
    jCas.setDocumentLanguage("en");

    Sentence s0 = new Sentence(jCas, 0, 9);
    s0.addToIndexes();

    Sentence s1 = new Sentence(jCas, 10, 14);
    s1.addToIndexes();

    Sentence s2 = new Sentence(jCas, 15, 24);
    s2.addToIndexes();

    Sentence s3 = new Sentence(jCas, 25, 29);
    s3.addToIndexes();

    Premise p1 = new Premise(jCas, 0, 14);
    p1.addToIndexes();

    Premise p2 = new Premise(jCas, 25, 29);
    p2.addToIndexes();

    System.out.println("'" + s0.getCoveredText() + "'");
    System.out.println("'" + s1.getCoveredText() + "'");
    System.out.println("'" + s2.getCoveredText() + "'");
    System.out.println("'" + s3.getCoveredText() + "'");
    System.out.println("p1: '" + p1.getCoveredText() + "'");
    System.out.println("p2: '" + p2.getCoveredText() + "'");
}
 
开发者ID:UKPLab,项目名称:argument-reasoning-comprehension-task,代码行数:34,代码来源:AnnotationSpansTest.java

示例8: posTag

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public List<String> posTag(List<String> sentence)
    throws UIMAException
{

    JCas jCas = JCasFactory.createJCas();
    StringBuilder documentText = new StringBuilder();

    int start = 0;
    for (int i = 0; i < sentence.size(); i++) {
        String token = sentence.get(i);
        documentText.append(token);

        Token t = new Token(jCas, start, documentText.length());
        t.addToIndexes();

        if (i + 1 < sentence.size()) {
            documentText.append(" ");
        }
        start = documentText.length();
    }
    jCas.setDocumentText(documentText.toString());
    
    Sentence s = new Sentence(jCas, 0, jCas.getDocumentText().length());
    s.addToIndexes();

    callFlexTag(jCas);

    List<String> posTags = new ArrayList<String>();
    JCasUtil.select(jCas, POS.class).forEach(x -> posTags.add(x.getPosValue()));

    return posTags;
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:33,代码来源:UseModelFlexTag.java

示例9: tagSentences

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public List<List<String>> tagSentences(List<List<String>> sentences)
    throws Exception
{

    JCas jCas = JCasFactory.createJCas();

    StringBuilder sb = new StringBuilder();
    for (List<String> sentence : sentences) {
        int sentStart = sb.length();
        for (String token : sentence) {
            int start = sb.length();
            int end = sb.length() + token.length();

            Token t = new Token(jCas, start, end);
            t.addToIndexes();

            sb.append(token);
        }
        int sentEnd = sb.length();
        Sentence s = new Sentence(jCas, sentStart, sentEnd);
        s.addToIndexes();
    }
    jCas.setDocumentText(sb.toString().trim());

    flexTagEngine.process(jCas);

    return extractTags(jCas);
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:29,代码来源:UseModelDKProTC.java

示例10: createSentence

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
/**
 * Add sentence layer to CAS
 */
private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence,
        Map<String, Token> tokensStored)
{
    for (int i = 0; i < firstTokenInSentence.size(); i++) {
        Sentence outSentence = new Sentence(aJCas);
        // Only last sentence, and no the only sentence in the document (i!=0)
        if (i == firstTokenInSentence.size() - 1 && i != 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
            break;
        }
        if (i == firstTokenInSentence.size() - 1 && i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
        }
        else if (i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1))
                    .getEnd());
            outSentence.addToIndexes();
        }
        else {
            outSentence.setBegin(
                    tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1);
            outSentence
                    .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd());
            outSentence.addToIndexes();
        }
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:38,代码来源:WebannoTsv1Reader.java

示例11: createSentence

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
protected Sentence createSentence(final JCas aJCas, final int aBegin,
        final int aEnd)
{
    int[] span = new int[] { aBegin, aEnd };
    trim(aJCas.getDocumentText(), span);
    if (!isEmpty(span[0], span[1])) {
        Sentence seg = new Sentence(aJCas, span[0], span[1]);
        seg.addToIndexes(aJCas);
        return seg;
    }
    else {
        return null;
    }
}
 
开发者ID:webanno,项目名称:webanno,代码行数:15,代码来源:LineOrientedTextReader.java

示例12: convert

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
private void convert(JCas aJCas, CrowdsourceTextSequence crowdsourceTextSequence)
    throws IOException
{
    JCasBuilder doc = new JCasBuilder(aJCas);

    int sentenceBegin = doc.getPosition();
    int sentenceEnd = sentenceBegin;
    List<Token> tokens = new ArrayList<Token>();
        
        // Tokens, NER-IOB
    for (CrowdsourceSequenceUnit sequenceUnit: crowdsourceTextSequence.getCrowdsourceSequenceUnits()) {
        String word = sequenceUnit.getText();
        // Read token
        Token token = doc.add(word, Token.class);
        sentenceEnd = token.getEnd();
        doc.add(" ");
        
        TextClassificationUnit unit = new TextClassificationUnit(aJCas, token.getBegin(), token.getEnd());
        unit.setSuffix("unit" + sequenceUnit.getId() + "_" + sequenceUnit.getText());
        unit.addToIndexes();
        
        TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
        outcome.setOutcome(sequenceUnit.useThisLabel);
        outcome.addToIndexes();
    
        tokens.add(token);
    }

    // Sentence
    Sentence sentence = new Sentence(aJCas, sentenceBegin, sentenceEnd);
    sentence.addToIndexes();
    
    TextClassificationSequence sequence = new TextClassificationSequence(aJCas, sentenceBegin, sentenceEnd);
    sequence.addToIndexes();

    // Once sentence per line.
    doc.add("\n");

    doc.close();
}
 
开发者ID:UKPLab,项目名称:emnlp2015-crowdsourcing,代码行数:41,代码来源:PosReaderBase.java

示例13: getNext

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
public void getNext(JCas aJCas)
    throws IOException, CollectionException
{

    DocumentMetaData md = new DocumentMetaData(aJCas);
    md.setDocumentTitle("");
    md.setDocumentId("" + (instanceId++));
    md.setLanguage(language);
    md.addToIndexes();

    try {
        posMappingProvider.configure(aJCas.getCas());
    }
    catch (AnalysisEngineProcessException e1) {
        throw new CollectionException(e1);
    }

    StringBuilder documentText = new StringBuilder();

    int seqStart = 0;
    for (int k = 0; k < sequences.size(); k++) {

        List<String> sequence = sequences.get(k);

        for (int i = 0; i < sequence.size(); i++) {
            String pairs = sequence.get(i).replaceAll(" +", " ");

            int idxLastSpace = pairs.lastIndexOf(" ");
            String token = pairs.substring(0, idxLastSpace);
            String tag = pairs.substring(idxLastSpace+1);

            int tokenLen = token.length();
            if(lowerCase){
                token = token.toLowerCase();
            }
            documentText.append(token);

            int tokStart = documentText.length() - tokenLen;
            int tokEnd = documentText.length();
            Token t = new Token(aJCas, tokStart, tokEnd);
            t.addToIndexes();

            if (i + 1 < sequence.size()) {
                documentText.append(" ");
            }

            Type posTag = posMappingProvider.getTagType(tag);
            POS pos = (POS) aJCas.getCas().createAnnotation(posTag, t.getBegin(),
                    t.getEnd());
            pos.setPosValue(tag);
            pos.addToIndexes();
            t.setPos(pos);
        }
        Sentence sentence = new Sentence(aJCas, seqStart, documentText.length());
        sentence.addToIndexes();

        if (k + 1 < sequences.size()) {
            documentText.append(" ");
        }
        seqStart = documentText.length();

    }
    aJCas.setDocumentText(documentText.toString());
}
 
开发者ID:Horsmann,项目名称:FlexTag,代码行数:65,代码来源:LineTokenTagReader.java

示例14: testSelectOverlapping

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
@Test
public void testSelectOverlapping()
        throws Exception
{
    {
        Sentence s1 = new Sentence(jCas);
        s1.setBegin(this.tokenThis.getBegin());
        s1.setEnd(this.tokenIs.getEnd());
        s1.addToIndexes();

        Sentence s2 = new Sentence(jCas);
        s2.setBegin(this.tokenA.getBegin());
        s2.setEnd(this.tokenDot.getEnd());
        s2.addToIndexes();
    }

    List<Sentence> sentences = new ArrayList<Sentence>(
            JCasUtil.select(jCas, Sentence.class));
    assertEquals(2, sentences.size());

    // annotation that covers "is" and "a" (each from different sentence)
    ArgumentComponent argumentComponent = new ArgumentComponent(jCas);
    argumentComponent.setBegin(tokenIs.getBegin());
    argumentComponent.setEnd(this.tokenA.getEnd());
    argumentComponent.addToIndexes();

    Collection<ArgumentComponent> argumentComponents = JCasUtil
            .select(jCas, ArgumentComponent.class);
    assertEquals(1, argumentComponents.size());

    ArgumentComponent component = argumentComponents.iterator().next();

    List<Token> coveredTokens = JCasUtil.selectCovered(Token.class, component);
    assertEquals(2, coveredTokens.size());
    assertEquals(this.tokenIs.getBegin(), coveredTokens.get(0).getBegin());
    assertEquals(this.tokenA.getBegin(), coveredTokens.get(1).getBegin());

    Sentence sent1 = sentences.get(0);
    Sentence sent2 = sentences.get(0);

    // !!! selectCovered won't find the ArgumentComponent annotation, as it crosses
    // the boundaries!!
    assertEquals(0, JCasUtil.selectCovered(ArgumentComponent.class, sent1).size());
    assertEquals(0, JCasUtil.selectCovering(ArgumentComponent.class, sent1).size());

    assertEquals(0, JCasUtil.selectCovered(ArgumentComponent.class, sent2).size());
    assertEquals(0, JCasUtil.selectCovering(ArgumentComponent.class, sent2).size());

    // now we select overlapping -- we get the same component from both sentences
    assertEquals(1, JCasUtil2.selectOverlapping(ArgumentComponent.class, sent1, jCas).size());
    assertEquals(1, JCasUtil2.selectOverlapping(ArgumentComponent.class, sent2, jCas).size());

    // and this is indeed the "component" that overlaps both sentences
    assertEquals(component,
            JCasUtil2.selectOverlapping(ArgumentComponent.class, sent1, jCas).iterator()
                    .next());
    assertEquals(component,
            JCasUtil2.selectOverlapping(ArgumentComponent.class, sent2, jCas).iterator()
                    .next());

}
 
开发者ID:dkpro,项目名称:dkpro-argumentation,代码行数:62,代码来源:JCasUtil2Test.java

示例15: terminateSentence

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; //导入方法依赖的package包/类
/**
 * Creates {@link Sentence} Annotation, sets the sentence end.
 * @param sentence the current {@link Sentence} annotation
 * @param token the last {@link Token} in the sentence
 */
private void terminateSentence(Sentence sentence, Token token) {
    sentence.setEnd(token.getEnd());
    sentence.addToIndexes();
}
 
开发者ID:uhh-lt,项目名称:LT-ABSA,代码行数:10,代码来源:ConllReader.java


注:本文中的de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence.addToIndexes方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。